diff --git a/Cargo.lock b/Cargo.lock index 7e45525f0..7610e5884 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1077,6 +1077,7 @@ dependencies = [ "typed-arena", "typst-macros", "unicode-bidi", + "unicode-math", "unicode-script", "unicode-segmentation", "unicode-xid", diff --git a/Cargo.toml b/Cargo.toml index aa63a4a72..03bc717ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,7 @@ usvg = { version = "0.22", default-features = false } # External implementation of user-facing features syntect = { version = "5", default-features = false, features = ["default-syntaxes", "regex-fancy"] } rex = { git = "https://github.com/laurmaedje/ReX" } +unicode-math = { git = "https://github.com/s3bk/unicode-math/" } lipsum = { git = "https://github.com/reknih/lipsum" } csv = "1" serde_json = "1" diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 7ef5bf764..5bbac77e2 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -33,6 +33,7 @@ pub use value::*; pub use vm::*; use std::collections::BTreeMap; +use std::sync::Arc; use comemo::{Track, Tracked}; use unicode_segmentation::UnicodeSegmentation; @@ -251,14 +252,86 @@ impl Eval for RawNode { } } -impl Eval for Spanned { +impl Eval for Math { type Output = Content; + fn eval(&self, vm: &mut Vm) -> SourceResult { + let nodes = + self.nodes().map(|node| node.eval(vm)).collect::>()?; + Ok(Content::show(library::math::MathNode::Row( + Arc::new(nodes), + self.span(), + ))) + } +} + +impl Eval for MathNode { + type Output = library::math::MathNode; + + fn eval(&self, vm: &mut Vm) -> SourceResult { + Ok(match self { + Self::Space => library::math::MathNode::Space, + Self::Linebreak => library::math::MathNode::Linebreak, + Self::Atom(atom) => library::math::MathNode::Atom(atom.clone()), + Self::Script(node) => node.eval(vm)?, + Self::Frac(node) => node.eval(vm)?, + Self::Align(node) => node.eval(vm)?, + Self::Group(node) => library::math::MathNode::Row( + Arc::new( + node.nodes() + .map(|node| node.eval(vm)) + .collect::>()?, + ), + node.span(), + ), + Self::Expr(expr) => match expr.eval(vm)?.display() { + Content::Text(text) => library::math::MathNode::Atom(text), + _ => bail!(expr.span(), "expected text"), + }, + }) + } +} + +impl Eval for ScriptNode { + type Output = library::math::MathNode; + + fn eval(&self, vm: &mut Vm) -> SourceResult { + Ok(library::math::MathNode::Script(Arc::new( + library::math::ScriptNode { + base: self.base().eval(vm)?, + sub: self + .sub() + .map(|node| node.eval(vm)) + .transpose()? + .map(|node| node.unparen()), + sup: self + .sup() + .map(|node| node.eval(vm)) + .transpose()? + .map(|node| node.unparen()), + }, + ))) + } +} + +impl Eval for FracNode { + type Output = library::math::MathNode; + + fn eval(&self, vm: &mut Vm) -> SourceResult { + Ok(library::math::MathNode::Frac(Arc::new( + library::math::FracNode { + num: self.num().eval(vm)?.unparen(), + denom: self.denom().eval(vm)?.unparen(), + }, + ))) + } +} + +impl Eval for AlignNode { + type Output = library::math::MathNode; + fn eval(&self, _: &mut Vm) -> SourceResult { - Ok(Content::show(library::math::MathNode { - formula: self.clone().map(|math| math.formula), - display: self.v.display, - })) + Ok(library::math::MathNode::Align(self.count())) } } diff --git a/src/library/math/frac.rs b/src/library/math/frac.rs new file mode 100644 index 000000000..791fd19a2 --- /dev/null +++ b/src/library/math/frac.rs @@ -0,0 +1,17 @@ +use super::*; +use crate::library::prelude::*; + +/// A fraction in a mathematical formula. +#[derive(Debug, Hash)] +pub struct FracNode { + /// The numerator. + pub num: MathNode, + /// The denominator. + pub denom: MathNode, +} + +impl Texify for FracNode { + fn texify(&self) -> EcoString { + format_eco!("\\frac{{{}}}{{{}}}", self.num.texify(), self.denom.texify()) + } +} diff --git a/src/library/math/mod.rs b/src/library/math/mod.rs index 1f5ea8f37..7b5fdf522 100644 --- a/src/library/math/mod.rs +++ b/src/library/math/mod.rs @@ -1,19 +1,39 @@ //! Mathematical formulas. -mod rex; +mod frac; +mod script; +pub use frac::*; +pub use script::*; + +use rex::error::{Error, LayoutError}; +use rex::font::FontContext; +use rex::layout::{LayoutSettings, Style}; +use rex::parser::color::RGBA; +use rex::render::{Backend, Cursor, Renderer}; + +use crate::font::Font; use crate::library::layout::BlockSpacing; use crate::library::prelude::*; -use crate::library::text::FontFamily; -use crate::syntax::Spanned; +use crate::library::text::{variant, FontFamily, Lang, TextNode}; -/// A mathematical formula. -#[derive(Debug, Hash)] -pub struct MathNode { - /// The formula. - pub formula: Spanned, - /// Whether the formula is display-level. - pub display: bool, +/// A piece of a mathematical formula. +#[derive(Debug, Clone, Hash)] +pub enum MathNode { + /// Whitespace. + Space, + /// A forced line break. + Linebreak, + /// An atom in a math formula: `x`, `+`, `12`. + Atom(EcoString), + /// A base with optional sub and superscripts: `a_1^2`. + Script(Arc), + /// A fraction: `x/2`. + Frac(Arc), + /// A numbered math alignment indicator: `&`, `&&`. + Align(usize), + /// A row of mathematical material. + Row(Arc>, Span), } #[node(showable)] @@ -28,41 +48,54 @@ impl MathNode { #[property(resolve, shorthand(around))] pub const BELOW: Option = Some(Ratio::one().into()); - fn construct(_: &mut Vm, args: &mut Args) -> SourceResult { - Ok(Content::show(Self { - formula: args.expect("formula")?, - display: args.named("display")?.unwrap_or(false), - })) + fn construct(_: &mut Vm, _: &mut Args) -> SourceResult { + todo!() + } +} + +impl MathNode { + /// Whether this is a display-style node. + pub fn display(&self) -> bool { + match self { + Self::Row(row, _) => { + matches!(row.as_slice(), [MathNode::Space, .., MathNode::Space]) + } + _ => false, + } + } + + /// Strip parentheses from the node. + pub fn unparen(self) -> Self { + if let Self::Row(row, span) = &self { + if let [MathNode::Atom(l), .., MathNode::Atom(r)] = row.as_slice() { + if l == "(" && r == ")" { + let inner = row[1 .. row.len() - 1].to_vec(); + return Self::Row(Arc::new(inner), *span); + } + } + } + + self } } impl Show for MathNode { fn unguard(&self, _: Selector) -> ShowNode { - Self { formula: self.formula.clone(), ..*self }.pack() + ShowNode::new(self.clone()) } fn encode(&self, _: StyleChain) -> Dict { - dict! { - "formula" => Value::Str(self.formula.v.clone().into()), - "display" => Value::Bool(self.display) - } + todo!() } - fn realize( - &self, - _: Tracked, - styles: StyleChain, - ) -> SourceResult { - let node = self::rex::RexNode { - tex: self.formula.clone(), - display: self.display, - family: styles.get(Self::FAMILY).clone(), - }; - - Ok(if self.display { - Content::block(node.pack().aligned(Spec::with_x(Some(Align::Center.into())))) + fn realize(&self, _: Tracked, _: StyleChain) -> SourceResult { + Ok(if self.display() { + Content::block( + LayoutNode::new(self.clone()) + .aligned(Spec::with_x(Some(Align::Center.into()))), + ) } else { - Content::inline(node) + Content::inline(self.clone()) }) } @@ -75,10 +108,197 @@ impl Show for MathNode { let mut map = StyleMap::new(); map.set_family(styles.get(Self::FAMILY).clone(), styles); - if self.display { + if self.display() { realized = realized.spaced(styles.get(Self::ABOVE), styles.get(Self::BELOW)); } Ok(realized.styled_with_map(map)) } } + +impl Layout for MathNode { + fn layout( + &self, + world: Tracked, + _: &Regions, + styles: StyleChain, + ) -> SourceResult> { + let style = if self.display() { Style::Display } else { Style::Text }; + let span = match self { + &Self::Row(_, span) => span, + _ => Span::detached(), + }; + Ok(vec![layout_tex(world, self, span, style, styles)?]) + } +} + +/// Layout a TeX formula into a frame. +fn layout_tex( + world: Tracked, + node: &dyn Texify, + span: Span, + style: Style, + styles: StyleChain, +) -> SourceResult { + let tex = node.texify(); + + // Load the font. + let font = world + .book() + .select(styles.get(MathNode::FAMILY).as_str(), variant(styles)) + .and_then(|id| world.font(id)) + .ok_or("failed to find math font") + .at(span)?; + + // Prepare the font context. + let ctx = font + .math() + .map(|math| FontContext::new(font.ttf(), math)) + .ok_or("font is not suitable for math") + .at(span)?; + + // Layout the formula. + let em = styles.get(TextNode::SIZE); + let settings = LayoutSettings::new(&ctx, em.to_pt(), style); + let renderer = Renderer::new(); + let layout = renderer + .layout(&tex, settings) + .map_err(|err| match err { + Error::Parse(err) => err.to_string(), + Error::Layout(LayoutError::Font(err)) => err.to_string(), + }) + .at(span)?; + + // Determine the metrics. + let (x0, y0, x1, y1) = renderer.size(&layout); + let width = Length::pt(x1 - x0); + let mut top = Length::pt(y1); + let mut bottom = Length::pt(-y0); + if style != Style::Display { + let metrics = font.metrics(); + top = styles.get(TextNode::TOP_EDGE).resolve(styles, metrics); + bottom = -styles.get(TextNode::BOTTOM_EDGE).resolve(styles, metrics); + }; + + // Prepare a frame rendering backend. + let size = Size::new(width, top + bottom); + let mut backend = FrameBackend { + frame: { + let mut frame = Frame::new(size); + frame.set_baseline(top); + frame.apply_role(Role::Formula); + frame + }, + baseline: top, + font: font.clone(), + fill: styles.get(TextNode::FILL), + lang: styles.get(TextNode::LANG), + colors: vec![], + }; + + // Render into the frame. + renderer.render(&layout, &mut backend); + Ok(backend.frame) +} + +/// A ReX rendering backend that renders into a frame. +struct FrameBackend { + frame: Frame, + baseline: Length, + font: Font, + fill: Paint, + lang: Lang, + colors: Vec, +} + +impl FrameBackend { + /// The currently active fill paint. + fn fill(&self) -> Paint { + self.colors + .last() + .map(|&RGBA(r, g, b, a)| RgbaColor::new(r, g, b, a).into()) + .unwrap_or(self.fill) + } + + /// Convert a cursor to a point. + fn transform(&self, cursor: Cursor) -> Point { + Point::new(Length::pt(cursor.x), self.baseline + Length::pt(cursor.y)) + } +} + +impl Backend for FrameBackend { + fn symbol(&mut self, pos: Cursor, gid: u16, scale: f64) { + self.frame.push( + self.transform(pos), + Element::Text(Text { + font: self.font.clone(), + size: Length::pt(scale), + fill: self.fill(), + lang: self.lang, + glyphs: vec![Glyph { + id: gid, + x_advance: Em::new(0.0), + x_offset: Em::new(0.0), + c: ' ', + }], + }), + ); + } + + fn rule(&mut self, pos: Cursor, width: f64, height: f64) { + self.frame.push( + self.transform(pos), + Element::Shape(Shape { + geometry: Geometry::Rect(Size::new( + Length::pt(width), + Length::pt(height), + )), + fill: Some(self.fill()), + stroke: None, + }), + ); + } + + fn begin_color(&mut self, color: RGBA) { + self.colors.push(color); + } + + fn end_color(&mut self) { + self.colors.pop(); + } +} + +/// Turn a math node into TeX math code. +trait Texify { + /// Perform the conversion. + fn texify(&self) -> EcoString; +} + +impl Texify for MathNode { + fn texify(&self) -> EcoString { + match self { + Self::Space => "".into(), + Self::Linebreak => r"\\".into(), + Self::Atom(atom) => atom.chars().map(escape_char).collect(), + Self::Script(script) => script.texify(), + Self::Frac(frac) => frac.texify(), + Self::Align(_) => "".into(), + Self::Row(row, _) => row.iter().map(Texify::texify).collect(), + } + } +} + +#[rustfmt::skip] +fn escape_char(c: char) -> EcoString { + match c { + '{' | '}' | '%' | '&' | '$' | '#' => format_eco!(" \\{c} "), + 'a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9' | 'Α' ..= 'Ω' | 'α' ..= 'ω' | + '*' | '+' | '-' | '[' | '(' | ']' | ')' | '?' | '!' | '=' | '<' | '>' | + ':' | ',' | ';' | '|' | '/' | '@' | '.' | '"' => c.into(), + c => unicode_math::SYMBOLS + .iter() + .find(|sym| sym.codepoint == c) + .map(|sym| format_eco!("\\{} ", sym.name)) + .unwrap_or_default(), + } +} diff --git a/src/library/math/rex.rs b/src/library/math/rex.rs deleted file mode 100644 index 96e8e4380..000000000 --- a/src/library/math/rex.rs +++ /dev/null @@ -1,157 +0,0 @@ -use rex::error::{Error, LayoutError}; -use rex::font::FontContext; -use rex::layout::{LayoutSettings, Style}; -use rex::parser::color::RGBA; -use rex::render::{Backend, Cursor, Renderer}; - -use crate::font::Font; -use crate::library::prelude::*; -use crate::library::text::{variant, FontFamily, Lang, TextNode}; - -/// A layout node that renders with ReX. -#[derive(Debug, Hash)] -pub struct RexNode { - /// The TeX formula. - pub tex: Spanned, - /// Whether the formula is display-level. - pub display: bool, - /// The font family. - pub family: FontFamily, -} - -impl Layout for RexNode { - fn layout( - &self, - world: Tracked, - _: &Regions, - styles: StyleChain, - ) -> SourceResult> { - // Load the font. - let span = self.tex.span; - let font = world - .book() - .select(self.family.as_str(), variant(styles)) - .and_then(|id| world.font(id)) - .ok_or("failed to find math font") - .at(span)?; - - // Prepare the font context. - let ctx = font - .math() - .map(|math| FontContext::new(font.ttf(), math)) - .ok_or("font is not suitable for math") - .at(span)?; - - // Layout the formula. - let em = styles.get(TextNode::SIZE); - let style = if self.display { Style::Display } else { Style::Text }; - let settings = LayoutSettings::new(&ctx, em.to_pt(), style); - let renderer = Renderer::new(); - let layout = renderer - .layout(&self.tex.v, settings) - .map_err(|err| match err { - Error::Parse(err) => err.to_string(), - Error::Layout(LayoutError::Font(err)) => err.to_string(), - }) - .at(span)?; - - // Determine the metrics. - let (x0, y0, x1, y1) = renderer.size(&layout); - let width = Length::pt(x1 - x0); - let mut top = Length::pt(y1); - let mut bottom = Length::pt(-y0); - if !self.display { - let metrics = font.metrics(); - top = styles.get(TextNode::TOP_EDGE).resolve(styles, metrics); - bottom = -styles.get(TextNode::BOTTOM_EDGE).resolve(styles, metrics); - }; - - // Prepare a frame rendering backend. - let size = Size::new(width, top + bottom); - let mut backend = FrameBackend { - frame: { - let mut frame = Frame::new(size); - frame.set_baseline(top); - frame.apply_role(Role::Formula); - frame - }, - baseline: top, - font: font.clone(), - fill: styles.get(TextNode::FILL), - lang: styles.get(TextNode::LANG), - colors: vec![], - }; - - // Render into the frame. - renderer.render(&layout, &mut backend); - - Ok(vec![backend.frame]) - } -} - -/// A ReX rendering backend that renders into a frame. -struct FrameBackend { - frame: Frame, - baseline: Length, - font: Font, - fill: Paint, - lang: Lang, - colors: Vec, -} - -impl FrameBackend { - /// The currently active fill paint. - fn fill(&self) -> Paint { - self.colors - .last() - .map(|&RGBA(r, g, b, a)| RgbaColor::new(r, g, b, a).into()) - .unwrap_or(self.fill) - } - - /// Convert a cursor to a point. - fn transform(&self, cursor: Cursor) -> Point { - Point::new(Length::pt(cursor.x), self.baseline + Length::pt(cursor.y)) - } -} - -impl Backend for FrameBackend { - fn symbol(&mut self, pos: Cursor, gid: u16, scale: f64) { - self.frame.push( - self.transform(pos), - Element::Text(Text { - font: self.font.clone(), - size: Length::pt(scale), - fill: self.fill(), - lang: self.lang, - glyphs: vec![Glyph { - id: gid, - x_advance: Em::new(0.0), - x_offset: Em::new(0.0), - c: ' ', - }], - }), - ); - } - - fn rule(&mut self, pos: Cursor, width: f64, height: f64) { - self.frame.push( - self.transform(pos), - Element::Shape(Shape { - geometry: Geometry::Rect(Size::new( - Length::pt(width), - Length::pt(height), - )), - fill: Some(self.fill()), - stroke: None, - }), - ); - } - - fn begin_color(&mut self, color: RGBA) { - self.colors.push(color); - } - - fn end_color(&mut self) { - self.colors.pop(); - } -} diff --git a/src/library/math/script.rs b/src/library/math/script.rs new file mode 100644 index 000000000..09f52164f --- /dev/null +++ b/src/library/math/script.rs @@ -0,0 +1,31 @@ +use std::fmt::Write; + +use super::*; +use crate::library::prelude::*; + +/// A sub- and/or superscript in a mathematical formula. +#[derive(Debug, Hash)] +pub struct ScriptNode { + /// The base. + pub base: MathNode, + /// The subscript. + pub sub: Option, + /// The superscript. + pub sup: Option, +} + +impl Texify for ScriptNode { + fn texify(&self) -> EcoString { + let mut tex = self.base.texify(); + + if let Some(sub) = &self.sub { + write!(tex, "_{{{}}}", sub.texify()).unwrap(); + } + + if let Some(sup) = &self.sup { + write!(tex, "^{{{}}}", sup.texify()).unwrap(); + } + + tex + } +} diff --git a/src/library/mod.rs b/src/library/mod.rs index e7617bc0c..b42ec0713 100644 --- a/src/library/mod.rs +++ b/src/library/mod.rs @@ -73,6 +73,11 @@ pub fn new() -> Scope { // Math. std.def_node::("math"); + std.define("sum", "∑"); + std.define("in", "∈"); + std.define("arrow", "→"); + std.define("NN", "ℕ"); + std.define("RR", "ℝ"); // Utility. std.def_fn("type", utility::type_); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index ed8bc5ced..7eb7343b6 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -11,7 +11,7 @@ pub use tokens::*; use std::collections::HashSet; -use crate::syntax::ast::{Associativity, BinOp, UnOp}; +use crate::syntax::ast::{Assoc, BinOp, UnOp}; use crate::syntax::{NodeKind, SpanPos, SyntaxNode}; use crate::util::EcoString; @@ -22,11 +22,22 @@ pub fn parse(text: &str) -> SyntaxNode { p.finish().into_iter().next().unwrap() } +/// Parse math directly, only used for syntax highlighting. +pub fn parse_math(text: &str) -> SyntaxNode { + let mut p = Parser::new(text, TokenMode::Math); + p.perform(NodeKind::Math, |p| { + while !p.eof() { + math_node(p); + } + }); + p.finish().into_iter().next().unwrap() +} + /// Parse code directly, only used for syntax highlighting. -pub fn parse_code(text: &str) -> Vec { +pub fn parse_code(text: &str) -> SyntaxNode { let mut p = Parser::new(text, TokenMode::Code); - code(&mut p); - p.finish() + p.perform(NodeKind::CodeBlock, code); + p.finish().into_iter().next().unwrap() } /// Reparse a code block. @@ -240,20 +251,20 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Text and markup. NodeKind::Text(_) | NodeKind::Linebreak { .. } - | NodeKind::NonBreakingSpace - | NodeKind::Shy - | NodeKind::EnDash - | NodeKind::EmDash - | NodeKind::Ellipsis + | NodeKind::Tilde + | NodeKind::HyphQuest + | NodeKind::Hyph2 + | NodeKind::Hyph3 + | NodeKind::Dot3 | NodeKind::Quote { .. } | NodeKind::Escape(_) | NodeKind::Link(_) | NodeKind::Raw(_) - | NodeKind::Math(_) | NodeKind::Label(_) - | NodeKind::Ref(_) => { - p.eat(); - } + | NodeKind::Ref(_) => p.eat(), + + // Math. + NodeKind::Dollar => math(p), // Strong, emph, heading. NodeKind::Star => strong(p), @@ -405,6 +416,111 @@ fn markup_expr(p: &mut Parser) { p.end_group(); } +/// Parse math. +fn math(p: &mut Parser) { + p.perform(NodeKind::Math, |p| { + p.start_group(Group::Math); + while !p.eof() { + math_node(p); + } + p.end_group(); + }); +} + +/// Parse a math node. +fn math_node(p: &mut Parser) { + math_node_prec(p, 0, None) +} + +/// Parse a math node with operators having at least the minimum precedence. +fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option) { + let marker = p.marker(); + math_primary(p); + + loop { + let (kind, mut prec, assoc, stop) = match p.peek() { + v if v == stop.as_ref() => break, + Some(NodeKind::Underscore) => { + (NodeKind::Script, 2, Assoc::Right, Some(NodeKind::Hat)) + } + Some(NodeKind::Hat) => ( + NodeKind::Script, + 2, + Assoc::Right, + Some(NodeKind::Underscore), + ), + Some(NodeKind::Slash) => (NodeKind::Frac, 1, Assoc::Left, None), + _ => break, + }; + + if prec < min_prec { + break; + } + + match assoc { + Assoc::Left => prec += 1, + Assoc::Right => {} + } + + p.eat(); + math_node_prec(p, prec, stop); + + // Allow up to two different scripts. We do not risk encountering the + // previous script kind again here due to right-associativity. + if p.eat_if(NodeKind::Underscore) || p.eat_if(NodeKind::Hat) { + math_node_prec(p, prec, None); + } + + marker.end(p, kind); + } +} + +/// Parse a primary math node. +fn math_primary(p: &mut Parser) { + let token = match p.peek() { + Some(t) => t, + None => return, + }; + + match token { + // Spaces, atoms and expressions. + NodeKind::Space { .. } + | NodeKind::Linebreak + | NodeKind::Escape(_) + | NodeKind::Atom(_) + | NodeKind::Ident(_) => p.eat(), + + // Groups. + NodeKind::LeftParen => group(p, Group::Paren), + NodeKind::LeftBracket => group(p, Group::Bracket), + NodeKind::LeftBrace => group(p, Group::Brace), + + // Alignment indactor. + NodeKind::Amp => align(p), + + _ => p.unexpected(), + } +} + +/// Parse grouped math. +fn group(p: &mut Parser, group: Group) { + p.perform(NodeKind::Math, |p| { + p.start_group(group); + while !p.eof() { + math_node(p); + } + p.end_group(); + }) +} + +/// Parse an alignment indicator. +fn align(p: &mut Parser) { + p.perform(NodeKind::Align, |p| { + p.assert(NodeKind::Amp); + while p.eat_if(NodeKind::Amp) {} + }) +} + /// Parse an expression. fn expr(p: &mut Parser) -> ParseResult { expr_prec(p, false, 0) @@ -434,7 +550,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { loop { // Parenthesis or bracket means this is a function call. if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() { - marker.perform(p, NodeKind::FuncCall, |p| args(p))?; + marker.perform(p, NodeKind::FuncCall, args)?; continue; } @@ -446,7 +562,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { if p.eat_if(NodeKind::Dot) { ident(p)?; if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() { - marker.perform(p, NodeKind::MethodCall, |p| args(p))?; + marker.perform(p, NodeKind::MethodCall, args)?; } else { marker.end(p, NodeKind::FieldAccess); } @@ -474,9 +590,9 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { p.eat(); - match op.associativity() { - Associativity::Left => prec += 1, - Associativity::Right => {} + match op.assoc() { + Assoc::Left => prec += 1, + Assoc::Right => {} } marker.perform(p, NodeKind::BinaryExpr, |p| expr_prec(p, atomic, prec))?; diff --git a/src/parse/parser.rs b/src/parse/parser.rs index f8ea96143..12dd324bd 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -92,14 +92,14 @@ impl<'s> Parser<'s> { let until = self.trivia_start(); let mut children = mem::replace(&mut self.children, prev); - if self.tokens.mode() == TokenMode::Code { + if self.tokens.mode() == TokenMode::Markup { + self.children.push(InnerNode::with_children(kind, children).into()); + } else { // Trailing trivia should not be wrapped into the new node. let idx = self.children.len(); self.children.push(SyntaxNode::default()); self.children.extend(children.drain(until.0 ..)); self.children[idx] = InnerNode::with_children(kind, children).into(); - } else { - self.children.push(InnerNode::with_children(kind, children).into()); } output @@ -122,7 +122,7 @@ impl<'s> Parser<'s> { self.prev_end = self.tokens.cursor(); self.bump(); - if self.tokens.mode() == TokenMode::Code { + if self.tokens.mode() != TokenMode::Markup { // Skip whitespace and comments. while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) { self.bump(); @@ -232,8 +232,17 @@ impl<'s> Parser<'s> { pub fn start_group(&mut self, kind: Group) { self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); self.tokens.set_mode(match kind { - Group::Bracket | Group::Strong | Group::Emph => TokenMode::Markup, - Group::Brace | Group::Paren | Group::Expr | Group::Imports => TokenMode::Code, + Group::Strong | Group::Emph => TokenMode::Markup, + Group::Bracket => match self.tokens.mode() { + TokenMode::Math => TokenMode::Math, + _ => TokenMode::Markup, + }, + Group::Brace | Group::Paren => match self.tokens.mode() { + TokenMode::Math => TokenMode::Math, + _ => TokenMode::Code, + }, + Group::Math => TokenMode::Math, + Group::Expr | Group::Imports => TokenMode::Code, }); match kind { @@ -242,6 +251,7 @@ impl<'s> Parser<'s> { Group::Paren => self.assert(NodeKind::LeftParen), Group::Strong => self.assert(NodeKind::Star), Group::Emph => self.assert(NodeKind::Underscore), + Group::Math => self.assert(NodeKind::Dollar), Group::Expr => self.repeek(), Group::Imports => self.repeek(), } @@ -260,11 +270,12 @@ impl<'s> Parser<'s> { // Eat the end delimiter if there is one. if let Some((end, required)) = match group.kind { - Group::Paren => Some((NodeKind::RightParen, true)), - Group::Bracket => Some((NodeKind::RightBracket, true)), Group::Brace => Some((NodeKind::RightBrace, true)), + Group::Bracket => Some((NodeKind::RightBracket, true)), + Group::Paren => Some((NodeKind::RightParen, true)), Group::Strong => Some((NodeKind::Star, true)), Group::Emph => Some((NodeKind::Underscore, true)), + Group::Math => Some((NodeKind::Dollar, true)), Group::Expr => Some((NodeKind::Semicolon, false)), Group::Imports => None, } { @@ -290,7 +301,7 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { let mut target = self.prev_end(); - if group_mode == TokenMode::Code { + if group_mode != TokenMode::Markup { let start = self.trivia_start().0; target = self.current_start - self.children[start ..].iter().map(SyntaxNode::len).sum::(); @@ -330,6 +341,7 @@ impl<'s> Parser<'s> { Some(NodeKind::RightParen) => self.inside(Group::Paren), Some(NodeKind::Star) => self.inside(Group::Strong), Some(NodeKind::Underscore) => self.inside(Group::Emph), + Some(NodeKind::Dollar) => self.inside(Group::Math), Some(NodeKind::Semicolon) => self.inside(Group::Expr), Some(NodeKind::From) => self.inside(Group::Imports), Some(NodeKind::Space { newlines }) => self.space_ends_group(*newlines), @@ -472,7 +484,7 @@ impl Marker { } // Don't expose trivia in code. - if p.tokens.mode() == TokenMode::Code && child.kind().is_trivia() { + if p.tokens.mode() != TokenMode::Markup && child.kind().is_trivia() { continue; } @@ -515,6 +527,8 @@ pub enum Group { Strong, /// A group surrounded with underscore: `_..._`. Emph, + /// A group surrounded by dollar signs: `$...$`. + Math, /// A group ended by a semicolon or a line break: `;`, `\n`. Expr, /// A group for import items, ended by a semicolon, line break or `from`. diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index f6d4b0e86..d495afa01 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -5,7 +5,7 @@ use unscanny::Scanner; use super::resolve::{resolve_hex, resolve_raw, resolve_string}; use crate::geom::{AngleUnit, LengthUnit}; -use crate::syntax::ast::{MathNode, RawNode, Unit}; +use crate::syntax::ast::{RawNode, Unit}; use crate::syntax::{NodeKind, SpanPos}; use crate::util::EcoString; @@ -27,6 +27,8 @@ pub struct Tokens<'s> { pub enum TokenMode { /// Text and markup. Markup, + /// Math atoms, operators, etc. + Math, /// Keywords, literals and operators. Code, } @@ -103,23 +105,16 @@ impl<'s> Iterator for Tokens<'s> { let start = self.s.cursor(); let c = self.s.eat()?; Some(match c { - // Comments. + // Trivia. '/' if self.s.eat_if('/') => self.line_comment(), '/' if self.s.eat_if('*') => self.block_comment(), '*' if self.s.eat_if('/') => NodeKind::Unknown("*/".into()), - - // Blocks. - '{' => NodeKind::LeftBrace, - '}' => NodeKind::RightBrace, - '[' => NodeKind::LeftBracket, - ']' => NodeKind::RightBracket, - - // Whitespace. c if c.is_whitespace() => self.whitespace(c), // Other things. _ => match self.mode { TokenMode::Markup => self.markup(start, c), + TokenMode::Math => self.math(start, c), TokenMode::Code => self.code(start, c), }, }) @@ -195,16 +190,23 @@ impl<'s> Tokens<'s> { #[inline] fn markup(&mut self, start: usize, c: char) -> NodeKind { match c { + // Blocks. + '{' => NodeKind::LeftBrace, + '}' => NodeKind::RightBrace, + '[' => NodeKind::LeftBracket, + ']' => NodeKind::RightBracket, + // Escape sequences. '\\' => self.backslash(), // Single-char things. - '~' => NodeKind::NonBreakingSpace, - '.' if self.s.eat_if("..") => NodeKind::Ellipsis, + '~' => NodeKind::Tilde, + '.' if self.s.eat_if("..") => NodeKind::Dot3, '\'' => NodeKind::Quote { double: false }, '"' => NodeKind::Quote { double: true }, '*' if !self.in_word() => NodeKind::Star, '_' if !self.in_word() => NodeKind::Underscore, + '$' => NodeKind::Dollar, '=' => NodeKind::Eq, '+' => NodeKind::Plus, '/' => NodeKind::Slash, @@ -217,7 +219,6 @@ impl<'s> Tokens<'s> { self.link(start) } '`' => self.raw(), - '$' => self.math(), c if c.is_ascii_digit() => self.numbering(start), '<' => self.label(), '@' => self.reference(start), @@ -313,12 +314,12 @@ impl<'s> Tokens<'s> { fn hyph(&mut self) -> NodeKind { if self.s.eat_if('-') { if self.s.eat_if('-') { - NodeKind::EmDash + NodeKind::Hyph3 } else { - NodeKind::EnDash + NodeKind::Hyph2 } } else if self.s.eat_if('?') { - NodeKind::Shy + NodeKind::HyphQuest } else { NodeKind::Minus } @@ -395,29 +396,6 @@ impl<'s> Tokens<'s> { } } - fn math(&mut self) -> NodeKind { - let mut escaped = false; - let formula = self.s.eat_until(|c| { - if c == '$' && !escaped { - true - } else { - escaped = c == '\\' && !escaped; - false - } - }); - - let display = formula.len() >= 2 - && formula.starts_with(char::is_whitespace) - && formula.ends_with(char::is_whitespace); - - if self.s.eat_if('$') { - NodeKind::Math(Arc::new(MathNode { formula: formula.into(), display })) - } else { - self.terminated = false; - NodeKind::Error(SpanPos::End, "expected dollar sign".into()) - } - } - fn numbering(&mut self, start: usize) -> NodeKind { self.s.eat_while(char::is_ascii_digit); let read = self.s.from(start); @@ -453,8 +431,51 @@ impl<'s> Tokens<'s> { } } + fn math(&mut self, start: usize, c: char) -> NodeKind { + match c { + // Escape sequences. + '\\' => self.backslash(), + + // Single-char things. + '_' => NodeKind::Underscore, + '^' => NodeKind::Hat, + '/' => NodeKind::Slash, + '&' => NodeKind::Amp, + '$' => NodeKind::Dollar, + + // Brackets. + '{' => NodeKind::LeftBrace, + '}' => NodeKind::RightBrace, + '[' => NodeKind::LeftBracket, + ']' => NodeKind::RightBracket, + '(' => NodeKind::LeftParen, + ')' => NodeKind::RightParen, + + // Identifiers. + c if is_math_id_start(c) && self.s.at(is_math_id_continue) => { + self.s.eat_while(is_math_id_continue); + NodeKind::Ident(self.s.from(start).into()) + } + + // Numbers. + c if c.is_numeric() => { + self.s.eat_while(char::is_numeric); + NodeKind::Atom(self.s.from(start).into()) + } + + // Other math atoms. + c => NodeKind::Atom(c.into()), + } + } + fn code(&mut self, start: usize, c: char) -> NodeKind { match c { + // Blocks. + '{' => NodeKind::LeftBrace, + '}' => NodeKind::RightBrace, + '[' => NodeKind::LeftBracket, + ']' => NodeKind::RightBracket, + // Parentheses. '(' => NodeKind::LeftParen, ')' => NodeKind::RightParen, @@ -673,6 +694,18 @@ fn is_id_continue(c: char) -> bool { c.is_xid_continue() || c == '_' || c == '-' } +/// Whether a character can start an identifier in math. +#[inline] +fn is_math_id_start(c: char) -> bool { + c.is_xid_start() +} + +/// Whether a character can continue an identifier in math. +#[inline] +fn is_math_id_continue(c: char) -> bool { + c.is_xid_continue() && c != '_' +} + #[cfg(test)] #[allow(non_snake_case)] mod tests { @@ -696,10 +729,6 @@ mod tests { })) } - fn Math(formula: &str, display: bool) -> NodeKind { - NodeKind::Math(Arc::new(MathNode { formula: formula.into(), display })) - } - fn Str(string: &str) -> NodeKind { NodeKind::Str(string.into()) } @@ -770,7 +799,6 @@ mod tests { ('/', None, "//", LineComment), ('/', None, "/**/", BlockComment), ('/', Some(Markup), "*", Star), - ('/', Some(Markup), "$ $", Math(" ", false)), ('/', Some(Markup), r"\\", Escape('\\')), ('/', Some(Markup), "#let", Let), ('/', Some(Code), "(", LeftParen), @@ -853,7 +881,7 @@ mod tests { // Test text ends. t!(Markup[""]: "hello " => Text("hello"), Space(0)); - t!(Markup[""]: "hello~" => Text("hello"), NonBreakingSpace); + t!(Markup[""]: "hello~" => Text("hello"), Tilde); } #[test] @@ -899,9 +927,9 @@ mod tests { t!(Markup[""]: "===" => Eq, Eq, Eq); t!(Markup["a1/"]: "= " => Eq, Space(0)); t!(Markup[" "]: r"\" => Linebreak); - t!(Markup: "~" => NonBreakingSpace); - t!(Markup["a1/"]: "-?" => Shy); - t!(Markup["a "]: r"a--" => Text("a"), EnDash); + t!(Markup: "~" => Tilde); + t!(Markup["a1/"]: "-?" => HyphQuest); + t!(Markup["a "]: r"a--" => Text("a"), Hyph2); t!(Markup["a1/"]: "- " => Minus, Space(0)); t!(Markup[" "]: "+" => Plus); t!(Markup[" "]: "1." => EnumNumbering(1)); @@ -998,24 +1026,6 @@ mod tests { t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false)); } - #[test] - fn test_tokenize_math_formulas() { - // Test basic formula. - t!(Markup: "$$" => Math("", false)); - t!(Markup: "$x$" => Math("x", false)); - t!(Markup: r"$\\$" => Math(r"\\", false)); - t!(Markup: r"$[\\]$" => Math(r"[\\]", false)); - t!(Markup: "$ x + y $" => Math(" x + y ", true)); - - // Test unterminated. - t!(Markup[""]: "$x" => Error(End, "expected dollar sign")); - t!(Markup[""]: "$[x]\n" => Error(End, "expected dollar sign")); - - // Test escape sequences. - t!(Markup: r"$\$x$" => Math(r"\$x", false)); - t!(Markup: r"$\ \$ $" => Math(r"\ \$ ", false)); - } - #[test] fn test_tokenize_idents() { // Test valid identifiers. diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 8d3696a89..6a016e795 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -5,7 +5,7 @@ use std::num::NonZeroUsize; use std::ops::Deref; -use super::{NodeData, NodeKind, Span, Spanned, SyntaxNode}; +use super::{NodeData, NodeKind, Span, SyntaxNode}; use crate::geom::{AngleUnit, LengthUnit}; use crate::util::EcoString; @@ -60,34 +60,7 @@ node! { impl Markup { /// The markup nodes. pub fn nodes(&self) -> impl Iterator + '_ { - self.0.children().filter_map(|node| match node.kind() { - NodeKind::Space { newlines: (2 ..) } => Some(MarkupNode::Parbreak), - NodeKind::Space { .. } => Some(MarkupNode::Space), - NodeKind::Linebreak => Some(MarkupNode::Linebreak), - NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), - NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())), - NodeKind::NonBreakingSpace => Some(MarkupNode::Text('\u{00A0}'.into())), - NodeKind::Shy => Some(MarkupNode::Text('\u{00AD}'.into())), - NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())), - NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())), - NodeKind::Ellipsis => Some(MarkupNode::Text('\u{2026}'.into())), - &NodeKind::Quote { double } => Some(MarkupNode::Quote { double }), - NodeKind::Strong => node.cast().map(MarkupNode::Strong), - NodeKind::Emph => node.cast().map(MarkupNode::Emph), - NodeKind::Link(url) => Some(MarkupNode::Link(url.clone())), - NodeKind::Raw(raw) => Some(MarkupNode::Raw(raw.as_ref().clone())), - NodeKind::Math(math) => Some(MarkupNode::Math(Spanned::new( - math.as_ref().clone(), - node.span(), - ))), - NodeKind::Heading => node.cast().map(MarkupNode::Heading), - NodeKind::List => node.cast().map(MarkupNode::List), - NodeKind::Enum => node.cast().map(MarkupNode::Enum), - NodeKind::Desc => node.cast().map(MarkupNode::Desc), - NodeKind::Label(v) => Some(MarkupNode::Label(v.clone())), - NodeKind::Ref(v) => Some(MarkupNode::Ref(v.clone())), - _ => node.cast().map(MarkupNode::Expr), - }) + self.0.children().filter_map(SyntaxNode::cast) } } @@ -113,7 +86,7 @@ pub enum MarkupNode { /// A raw block with optional syntax highlighting: `` `...` ``. Raw(RawNode), /// A math formula: `$a^2 = b^2 + c^2$`. - Math(Spanned), + Math(Math), /// A section heading: `= Introduction`. Heading(HeadingNode), /// An item in an unordered list: `- ...`. @@ -130,6 +103,40 @@ pub enum MarkupNode { Expr(Expr), } +impl TypedNode for MarkupNode { + fn from_untyped(node: &SyntaxNode) -> Option { + match node.kind() { + NodeKind::Space { newlines: (2 ..) } => Some(Self::Parbreak), + NodeKind::Space { .. } => Some(Self::Space), + NodeKind::Linebreak => Some(Self::Linebreak), + NodeKind::Text(s) => Some(Self::Text(s.clone())), + NodeKind::Escape(c) => Some(Self::Text((*c).into())), + NodeKind::Tilde => Some(Self::Text('\u{00A0}'.into())), + NodeKind::HyphQuest => Some(Self::Text('\u{00AD}'.into())), + NodeKind::Hyph2 => Some(Self::Text('\u{2013}'.into())), + NodeKind::Hyph3 => Some(Self::Text('\u{2014}'.into())), + NodeKind::Dot3 => Some(Self::Text('\u{2026}'.into())), + NodeKind::Quote { double } => Some(Self::Quote { double: *double }), + NodeKind::Strong => node.cast().map(Self::Strong), + NodeKind::Emph => node.cast().map(Self::Emph), + NodeKind::Link(url) => Some(Self::Link(url.clone())), + NodeKind::Raw(raw) => Some(Self::Raw(raw.as_ref().clone())), + NodeKind::Math => node.cast().map(Self::Math), + NodeKind::Heading => node.cast().map(Self::Heading), + NodeKind::List => node.cast().map(Self::List), + NodeKind::Enum => node.cast().map(Self::Enum), + NodeKind::Desc => node.cast().map(Self::Desc), + NodeKind::Label(v) => Some(Self::Label(v.clone())), + NodeKind::Ref(v) => Some(Self::Ref(v.clone())), + _ => node.cast().map(Self::Expr), + } + } + + fn as_untyped(&self) -> &SyntaxNode { + unimplemented!("MarkupNode::as_untyped") + } +} + node! { /// Strong content: `*Strong*`. StrongNode: Strong @@ -169,14 +176,122 @@ pub struct RawNode { pub block: bool, } -/// A math formula: `$x$`, `$[x^2]$`. +node! { + /// A math formula: `$x$`, `$ x^2 $`. + Math: NodeKind::Math { .. } +} + +impl Math { + /// The math nodes. + pub fn nodes(&self) -> impl Iterator + '_ { + self.0.children().filter_map(SyntaxNode::cast) + } +} + +/// A single piece of a math formula. #[derive(Debug, Clone, PartialEq, Hash)] -pub struct MathNode { - /// The formula between the dollars / brackets. - pub formula: EcoString, - /// Whether the formula is display-level, that is, it contains whitespace - /// after the starting dollar sign and before the ending dollar sign. - pub display: bool, +pub enum MathNode { + /// Whitespace. + Space, + /// A forced line break. + Linebreak, + /// An atom: `x`, `+`, `12`. + Atom(EcoString), + /// A base with an optional sub- and superscript: `a_1^2`. + Script(ScriptNode), + /// A fraction: `x/2`. + Frac(FracNode), + /// A math alignment indicator: `&`, `&&`. + Align(AlignNode), + /// Grouped mathematical material. + Group(Math), + /// An expression. + Expr(Expr), +} + +impl TypedNode for MathNode { + fn from_untyped(node: &SyntaxNode) -> Option { + match node.kind() { + NodeKind::Space { .. } => Some(Self::Space), + NodeKind::LeftBrace => Some(Self::Atom('{'.into())), + NodeKind::RightBrace => Some(Self::Atom('}'.into())), + NodeKind::LeftBracket => Some(Self::Atom('['.into())), + NodeKind::RightBracket => Some(Self::Atom(']'.into())), + NodeKind::LeftParen => Some(Self::Atom('('.into())), + NodeKind::RightParen => Some(Self::Atom(')'.into())), + NodeKind::Linebreak => Some(Self::Linebreak), + NodeKind::Escape(c) => Some(Self::Atom((*c).into())), + NodeKind::Atom(atom) => Some(Self::Atom(atom.clone())), + NodeKind::Script => node.cast().map(Self::Script), + NodeKind::Frac => node.cast().map(Self::Frac), + NodeKind::Align => node.cast().map(Self::Align), + NodeKind::Math => node.cast().map(Self::Group), + _ => node.cast().map(Self::Expr), + } + } + + fn as_untyped(&self) -> &SyntaxNode { + unimplemented!("MathNode::as_untyped") + } +} + +node! { + /// A base with an optional sub- and superscript in a formula: `a_1^2`. + ScriptNode: Script +} + +impl ScriptNode { + /// The base of the script. + pub fn base(&self) -> MathNode { + self.0.cast_first_child().expect("subscript is missing base") + } + + /// The subscript. + pub fn sub(&self) -> Option { + self.0 + .children() + .skip_while(|node| !matches!(node.kind(), NodeKind::Underscore)) + .nth(1) + .map(|node| node.cast().expect("script node has invalid subscript")) + } + + /// The superscript. + pub fn sup(&self) -> Option { + self.0 + .children() + .skip_while(|node| !matches!(node.kind(), NodeKind::Hat)) + .nth(1) + .map(|node| node.cast().expect("script node has invalid superscript")) + } +} + +node! { + /// A fraction in a formula: `x/2` + FracNode: Frac +} + +impl FracNode { + /// The numerator. + pub fn num(&self) -> MathNode { + self.0.cast_first_child().expect("fraction is missing numerator") + } + + /// The denominator. + pub fn denom(&self) -> MathNode { + self.0.cast_last_child().expect("fraction is missing denominator") + } +} + +node! { + /// A math alignment indicator: `&`, `&&`. + AlignNode: Align +} + +impl AlignNode { + /// The number of ampersands. + pub fn count(&self) -> usize { + self.0.children().filter(|n| n.kind() == &NodeKind::Amp).count() + } } node! { @@ -799,27 +914,27 @@ impl BinOp { } /// The associativity of this operator. - pub fn associativity(self) -> Associativity { + pub fn assoc(self) -> Assoc { match self { - Self::Add => Associativity::Left, - Self::Sub => Associativity::Left, - Self::Mul => Associativity::Left, - Self::Div => Associativity::Left, - Self::And => Associativity::Left, - Self::Or => Associativity::Left, - Self::Eq => Associativity::Left, - Self::Neq => Associativity::Left, - Self::Lt => Associativity::Left, - Self::Leq => Associativity::Left, - Self::Gt => Associativity::Left, - Self::Geq => Associativity::Left, - Self::In => Associativity::Left, - Self::NotIn => Associativity::Left, - Self::Assign => Associativity::Right, - Self::AddAssign => Associativity::Right, - Self::SubAssign => Associativity::Right, - Self::MulAssign => Associativity::Right, - Self::DivAssign => Associativity::Right, + Self::Add => Assoc::Left, + Self::Sub => Assoc::Left, + Self::Mul => Assoc::Left, + Self::Div => Assoc::Left, + Self::And => Assoc::Left, + Self::Or => Assoc::Left, + Self::Eq => Assoc::Left, + Self::Neq => Assoc::Left, + Self::Lt => Assoc::Left, + Self::Leq => Assoc::Left, + Self::Gt => Assoc::Left, + Self::Geq => Assoc::Left, + Self::In => Assoc::Left, + Self::NotIn => Assoc::Left, + Self::Assign => Assoc::Right, + Self::AddAssign => Assoc::Right, + Self::SubAssign => Assoc::Right, + Self::MulAssign => Assoc::Right, + Self::DivAssign => Assoc::Right, } } @@ -851,7 +966,7 @@ impl BinOp { /// The associativity of a binary operator. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum Associativity { +pub enum Assoc { /// Left-associative: `a + b + c` is equivalent to `(a + b) + c`. Left, /// Right-associative: `a = b = c` is equivalent to `a = (b = c)`. diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 4a4534808..e36405623 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -1,11 +1,10 @@ use std::fmt::Write; use std::ops::Range; -use std::sync::Arc; use syntect::highlighting::{Color, FontStyle, Highlighter, Style, Theme}; use syntect::parsing::Scope; -use super::{InnerNode, NodeKind, SyntaxNode}; +use super::{NodeKind, SyntaxNode}; use crate::parse::TokenMode; /// Provide highlighting categories for the descendants of a node that fall into @@ -47,13 +46,8 @@ where { let root = match mode { TokenMode::Markup => crate::parse::parse(text), - TokenMode::Code => { - let children = crate::parse::parse_code(text); - SyntaxNode::Inner(Arc::new(InnerNode::with_children( - NodeKind::CodeBlock, - children, - ))) - } + TokenMode::Math => crate::parse::parse_math(text), + TokenMode::Code => crate::parse::parse_code(text), }; let highlighter = Highlighter::new(&theme); @@ -169,8 +163,8 @@ pub enum Category { Math, /// A section heading. Heading, - /// A symbol of a list, enumeration, or description list. - List, + /// A marker of a list, enumeration, or description list. + ListMarker, /// A term in a description list. Term, /// A label. @@ -210,71 +204,50 @@ impl Category { match child.kind() { NodeKind::LineComment => Some(Category::Comment), NodeKind::BlockComment => Some(Category::Comment), + NodeKind::Space { .. } => None, + NodeKind::LeftBrace => Some(Category::Bracket), NodeKind::RightBrace => Some(Category::Bracket), NodeKind::LeftBracket => Some(Category::Bracket), NodeKind::RightBracket => Some(Category::Bracket), NodeKind::LeftParen => Some(Category::Bracket), NodeKind::RightParen => Some(Category::Bracket), - - NodeKind::Markup { .. } => match parent.kind() { - NodeKind::Desc - if parent - .children() - .take_while(|child| child.kind() != &NodeKind::Colon) - .find(|c| matches!(c.kind(), NodeKind::Markup { .. })) - .map_or(false, |ident| std::ptr::eq(ident, child)) => - { - Some(Category::Term) - } - _ => None, - }, - NodeKind::Space { .. } => None, - NodeKind::Linebreak { .. } => Some(Category::Shortcut), - NodeKind::Text(_) => None, - NodeKind::Escape(_) => Some(Category::Escape), - NodeKind::NonBreakingSpace => Some(Category::Shortcut), - NodeKind::Shy => Some(Category::Shortcut), - NodeKind::EnDash => Some(Category::Shortcut), - NodeKind::EmDash => Some(Category::Shortcut), - NodeKind::Ellipsis => Some(Category::Shortcut), - NodeKind::Quote { .. } => None, - NodeKind::Star => match parent.kind() { - NodeKind::Strong => None, - _ => Some(Category::Operator), - }, - NodeKind::Underscore => None, - NodeKind::Strong => Some(Category::Strong), - NodeKind::Emph => Some(Category::Emph), - NodeKind::Link(_) => Some(Category::Link), - NodeKind::Raw(_) => Some(Category::Raw), - NodeKind::Math(_) => Some(Category::Math), - NodeKind::Heading => Some(Category::Heading), - NodeKind::List => None, - NodeKind::Enum => None, - NodeKind::EnumNumbering(_) => Some(Category::List), - NodeKind::Desc => None, - NodeKind::Label(_) => Some(Category::Label), - NodeKind::Ref(_) => Some(Category::Ref), - NodeKind::Comma => Some(Category::Punctuation), NodeKind::Semicolon => Some(Category::Punctuation), NodeKind::Colon => match parent.kind() { NodeKind::Desc => Some(Category::Term), _ => Some(Category::Punctuation), }, + NodeKind::Star => match parent.kind() { + NodeKind::Strong => None, + _ => Some(Category::Operator), + }, + NodeKind::Underscore => match parent.kind() { + NodeKind::Script => Some(Category::Shortcut), + _ => None, + }, + NodeKind::Dollar => Some(Category::Math), + NodeKind::Tilde => Some(Category::Shortcut), + NodeKind::HyphQuest => Some(Category::Shortcut), + NodeKind::Hyph2 => Some(Category::Shortcut), + NodeKind::Hyph3 => Some(Category::Shortcut), + NodeKind::Dot3 => Some(Category::Shortcut), + NodeKind::Quote { .. } => None, NodeKind::Plus => match parent.kind() { - NodeKind::Enum => Some(Category::List), + NodeKind::Enum => Some(Category::ListMarker), _ => Some(Category::Operator), }, NodeKind::Minus => match parent.kind() { - NodeKind::List => Some(Category::List), + NodeKind::List => Some(Category::ListMarker), _ => Some(Category::Operator), }, NodeKind::Slash => match parent.kind() { - NodeKind::Desc => Some(Category::List), + NodeKind::Desc => Some(Category::ListMarker), + NodeKind::Frac => Some(Category::Shortcut), _ => Some(Category::Operator), }, + NodeKind::Hat => Some(Category::Shortcut), + NodeKind::Amp => Some(Category::Shortcut), NodeKind::Dot => Some(Category::Punctuation), NodeKind::Eq => match parent.kind() { NodeKind::Heading => None, @@ -292,6 +265,7 @@ impl Category { NodeKind::SlashEq => Some(Category::Operator), NodeKind::Dots => Some(Category::Operator), NodeKind::Arrow => Some(Category::Operator), + NodeKind::Not => Some(Category::Keyword), NodeKind::And => Some(Category::Keyword), NodeKind::Or => Some(Category::Keyword), @@ -314,8 +288,42 @@ impl Category { NodeKind::From => Some(Category::Keyword), NodeKind::As => Some(Category::Keyword), + NodeKind::Markup { .. } => match parent.kind() { + NodeKind::Desc + if parent + .children() + .take_while(|child| child.kind() != &NodeKind::Colon) + .find(|c| matches!(c.kind(), NodeKind::Markup { .. })) + .map_or(false, |ident| std::ptr::eq(ident, child)) => + { + Some(Category::Term) + } + _ => None, + }, + NodeKind::Linebreak { .. } => Some(Category::Shortcut), + NodeKind::Text(_) => None, + NodeKind::Escape(_) => Some(Category::Escape), + NodeKind::Strong => Some(Category::Strong), + NodeKind::Emph => Some(Category::Emph), + NodeKind::Link(_) => Some(Category::Link), + NodeKind::Raw(_) => Some(Category::Raw), + NodeKind::Math => None, + NodeKind::Heading => Some(Category::Heading), + NodeKind::List => None, + NodeKind::Enum => None, + NodeKind::EnumNumbering(_) => Some(Category::ListMarker), + NodeKind::Desc => None, + NodeKind::Label(_) => Some(Category::Label), + NodeKind::Ref(_) => Some(Category::Ref), + + NodeKind::Atom(_) => None, + NodeKind::Script => None, + NodeKind::Frac => None, + NodeKind::Align => None, + NodeKind::Ident(_) => match parent.kind() { NodeKind::Markup { .. } => Some(Category::Interpolated), + NodeKind::Math => Some(Category::Interpolated), NodeKind::FuncCall => Some(Category::Function), NodeKind::MethodCall if i > 0 => Some(Category::Function), NodeKind::ClosureExpr if i == 0 => Some(Category::Function), @@ -388,7 +396,7 @@ impl Category { Self::Raw => "markup.raw.typst", Self::Math => "string.other.math.typst", Self::Heading => "markup.heading.typst", - Self::List => "markup.list.typst", + Self::ListMarker => "markup.list.typst", Self::Term => "markup.list.term.typst", Self::Label => "entity.name.label.typst", Self::Ref => "markup.other.reference.typst", diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 6c6f690ce..367d00622 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -12,7 +12,7 @@ use std::sync::Arc; pub use highlight::*; pub use span::*; -use self::ast::{MathNode, RawNode, TypedNode, Unit}; +use self::ast::{RawNode, TypedNode, Unit}; use crate::diag::SourceError; use crate::source::SourceId; use crate::util::EcoString; @@ -579,6 +579,12 @@ pub enum NodeKind { /// /// The comment can contain nested block comments. BlockComment, + /// One or more whitespace characters. Single spaces are collapsed into text + /// nodes if they would otherwise be surrounded by text nodes. + /// + /// Also stores how many newlines are contained. + Space { newlines: usize }, + /// A left curly brace, starting a code block: `{`. LeftBrace, /// A right curly brace, terminating a code block: `}`. @@ -593,69 +599,6 @@ pub enum NodeKind { /// A right round parenthesis, terminating a grouped expression, collection, /// argument or parameter list: `)`. RightParen, - - /// Markup of which all lines must have a minimal indentation. - /// - /// Notably, the number does not determine in which column the markup - /// started, but to the right of which column all markup elements must be, - /// so it is zero except for headings and lists. - Markup { min_indent: usize }, - /// One or more whitespace characters. Single spaces are collapsed into text - /// nodes if they would otherwise be surrounded by text nodes. - /// - /// Also stores how many newlines are contained. - Space { newlines: usize }, - /// A forced line break. - Linebreak, - /// Consecutive text without markup. While basic text with just single - /// spaces is collapsed into a single node, certain symbols that could - /// possibly be markup force text into multiple nodes. - Text(EcoString), - /// A slash and the letter "u" followed by a hexadecimal unicode entity - /// enclosed in curly braces: `\u{1F5FA}`. - Escape(char), - /// A non-breaking space: `~`. - NonBreakingSpace, - /// A soft hyphen: `-?`. - Shy, - /// An en-dash: `--`. - EnDash, - /// An em-dash: `---`. - EmDash, - /// An ellipsis: `...`. - Ellipsis, - /// A smart quote: `'` or `"`. - Quote { double: bool }, - /// The strong text toggle, multiplication operator, and wildcard import - /// symbol: `*`. - Star, - /// Toggles emphasized text: `_`. - Underscore, - /// Strong content: `*Strong*`. - Strong, - /// Emphasized content: `_Emphasized_`. - Emph, - /// A hyperlink. - Link(EcoString), - /// A raw block with optional syntax highlighting: `` `...` ``. - Raw(Arc), - /// A math formula: `$x$`, `$[x^2]$`. - Math(Arc), - /// A section heading: `= Introduction`. - Heading, - /// An item in an unordered list: `- ...`. - List, - /// An item in an enumeration (ordered list): `+ ...` or `1. ...`. - Enum, - /// An explicit enumeration numbering: `23.`. - EnumNumbering(usize), - /// An item in a description list: `/ Term: Details. - Desc, - /// A label: `