From f9897479d2a8a865c4033bc44ec9a85fb5000795 Mon Sep 17 00:00:00 2001 From: cAttte <26514199+cAttte@users.noreply.github.com> Date: Thu, 12 Jun 2025 11:09:37 -0300 Subject: [PATCH] Unify `EvalMode` and `LexMode` into `SyntaxMode` (#6432) --- crates/typst-cli/src/query.rs | 6 +-- crates/typst-eval/src/lib.rs | 17 ++++---- crates/typst-library/src/foundations/cast.rs | 17 +++++++- crates/typst-library/src/foundations/mod.rs | 6 +-- .../typst-library/src/model/bibliography.rs | 6 +-- crates/typst-library/src/routines.rs | 15 +------ crates/typst-syntax/src/lexer.rs | 37 ++++++---------- crates/typst-syntax/src/lib.rs | 13 +++++- crates/typst-syntax/src/parser.rs | 43 ++++++++++--------- 9 files changed, 82 insertions(+), 78 deletions(-) diff --git a/crates/typst-cli/src/query.rs b/crates/typst-cli/src/query.rs index 7806e456f..b1a446203 100644 --- a/crates/typst-cli/src/query.rs +++ b/crates/typst-cli/src/query.rs @@ -5,9 +5,9 @@ use typst::diag::{bail, HintedStrResult, StrResult, Warned}; use typst::engine::Sink; use typst::foundations::{Content, IntoValue, LocatableSelector, Scope}; use typst::layout::PagedDocument; -use typst::syntax::Span; +use typst::syntax::{Span, SyntaxMode}; use typst::World; -use typst_eval::{eval_string, EvalMode}; +use typst_eval::eval_string; use crate::args::{QueryCommand, SerializationFormat}; use crate::compile::print_diagnostics; @@ -63,7 +63,7 @@ fn retrieve( Sink::new().track_mut(), &command.selector, Span::detached(), - EvalMode::Code, + SyntaxMode::Code, Scope::default(), ) .map_err(|errors| { diff --git a/crates/typst-eval/src/lib.rs b/crates/typst-eval/src/lib.rs index 586da26be..e4bbe4f0f 100644 --- a/crates/typst-eval/src/lib.rs +++ b/crates/typst-eval/src/lib.rs @@ -18,7 +18,6 @@ pub use self::call::{eval_closure, CapturesVisitor}; pub use self::flow::FlowEvent; pub use self::import::import; pub use self::vm::Vm; -pub use typst_library::routines::EvalMode; use self::access::*; use self::binding::*; @@ -32,7 +31,7 @@ use typst_library::introspection::Introspector; use typst_library::math::EquationElem; use typst_library::routines::Routines; use typst_library::World; -use typst_syntax::{ast, parse, parse_code, parse_math, Source, Span}; +use typst_syntax::{ast, parse, parse_code, parse_math, Source, Span, SyntaxMode}; /// Evaluate a source file and return the resulting module. #[comemo::memoize] @@ -104,13 +103,13 @@ pub fn eval_string( sink: TrackedMut, string: &str, span: Span, - mode: EvalMode, + mode: SyntaxMode, scope: Scope, ) -> SourceResult { let mut root = match mode { - EvalMode::Code => parse_code(string), - EvalMode::Markup => parse(string), - EvalMode::Math => parse_math(string), + SyntaxMode::Code => parse_code(string), + SyntaxMode::Markup => parse(string), + SyntaxMode::Math => parse_math(string), }; root.synthesize(span); @@ -141,11 +140,11 @@ pub fn eval_string( // Evaluate the code. let output = match mode { - EvalMode::Code => root.cast::().unwrap().eval(&mut vm)?, - EvalMode::Markup => { + SyntaxMode::Code => root.cast::().unwrap().eval(&mut vm)?, + SyntaxMode::Markup => { Value::Content(root.cast::().unwrap().eval(&mut vm)?) } - EvalMode::Math => Value::Content( + SyntaxMode::Math => Value::Content( EquationElem::new(root.cast::().unwrap().eval(&mut vm)?) .with_block(false) .pack() diff --git a/crates/typst-library/src/foundations/cast.rs b/crates/typst-library/src/foundations/cast.rs index 73645491f..5e0ba688e 100644 --- a/crates/typst-library/src/foundations/cast.rs +++ b/crates/typst-library/src/foundations/cast.rs @@ -9,7 +9,7 @@ use std::ops::Add; use ecow::eco_format; use smallvec::SmallVec; -use typst_syntax::{Span, Spanned}; +use typst_syntax::{Span, Spanned, SyntaxMode}; use unicode_math_class::MathClass; use crate::diag::{At, HintedStrResult, HintedString, SourceResult, StrResult}; @@ -459,6 +459,21 @@ impl FromValue for Never { } } +cast! { + SyntaxMode, + self => IntoValue::into_value(match self { + SyntaxMode::Markup => "markup", + SyntaxMode::Math => "math", + SyntaxMode::Code => "code", + }), + /// Evaluate as markup, as in a Typst file. + "markup" => SyntaxMode::Markup, + /// Evaluate as math, as in an equation. + "math" => SyntaxMode::Math, + /// Evaluate as code, as after a hash. + "code" => SyntaxMode::Code, +} + cast! { MathClass, self => IntoValue::into_value(match self { diff --git a/crates/typst-library/src/foundations/mod.rs b/crates/typst-library/src/foundations/mod.rs index d42be15b1..6840f855d 100644 --- a/crates/typst-library/src/foundations/mod.rs +++ b/crates/typst-library/src/foundations/mod.rs @@ -69,6 +69,7 @@ pub use self::ty::*; pub use self::value::*; pub use self::version::*; pub use typst_macros::{scope, ty}; +use typst_syntax::SyntaxMode; #[rustfmt::skip] #[doc(hidden)] @@ -83,7 +84,6 @@ use typst_syntax::Spanned; use crate::diag::{bail, SourceResult, StrResult}; use crate::engine::Engine; -use crate::routines::EvalMode; use crate::{Feature, Features}; /// Hook up all `foundations` definitions. @@ -273,8 +273,8 @@ pub fn eval( /// #eval("1_2^3", mode: "math") /// ``` #[named] - #[default(EvalMode::Code)] - mode: EvalMode, + #[default(SyntaxMode::Code)] + mode: SyntaxMode, /// A scope of definitions that are made available. /// /// ```example diff --git a/crates/typst-library/src/model/bibliography.rs b/crates/typst-library/src/model/bibliography.rs index a85efc810..7bfacfc66 100644 --- a/crates/typst-library/src/model/bibliography.rs +++ b/crates/typst-library/src/model/bibliography.rs @@ -16,7 +16,7 @@ use hayagriva::{ }; use indexmap::IndexMap; use smallvec::{smallvec, SmallVec}; -use typst_syntax::{Span, Spanned}; +use typst_syntax::{Span, Spanned, SyntaxMode}; use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr}; use crate::diag::{ @@ -39,7 +39,7 @@ use crate::model::{ CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem, Url, }; -use crate::routines::{EvalMode, Routines}; +use crate::routines::Routines; use crate::text::{ FontStyle, Lang, LocalName, Region, Smallcaps, SubElem, SuperElem, TextElem, WeightDelta, @@ -1024,7 +1024,7 @@ impl ElemRenderer<'_> { Sink::new().track_mut(), math, self.span, - EvalMode::Math, + SyntaxMode::Math, Scope::new(), ) .map(Value::display) diff --git a/crates/typst-library/src/routines.rs b/crates/typst-library/src/routines.rs index 6f0cb32b1..59ce83282 100644 --- a/crates/typst-library/src/routines.rs +++ b/crates/typst-library/src/routines.rs @@ -4,7 +4,7 @@ use std::hash::{Hash, Hasher}; use std::num::NonZeroUsize; use comemo::{Tracked, TrackedMut}; -use typst_syntax::Span; +use typst_syntax::{Span, SyntaxMode}; use typst_utils::LazyHash; use crate::diag::SourceResult; @@ -58,7 +58,7 @@ routines! { sink: TrackedMut, string: &str, span: Span, - mode: EvalMode, + mode: SyntaxMode, scope: Scope, ) -> SourceResult @@ -312,17 +312,6 @@ routines! { ) -> SourceResult } -/// In which mode to evaluate a string. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] -pub enum EvalMode { - /// Evaluate as code, as after a hash. - Code, - /// Evaluate as markup, like in a Typst file. - Markup, - /// Evaluate as math, as in an equation. - Math, -} - /// Defines what kind of realization we are performing. pub enum RealizationKind<'a> { /// This the root realization for layout. Requires a mutable reference diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index 7d363d7b5..74f14cfeb 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -4,7 +4,7 @@ use unicode_script::{Script, UnicodeScript}; use unicode_segmentation::UnicodeSegmentation; use unscanny::Scanner; -use crate::{SyntaxError, SyntaxKind, SyntaxNode}; +use crate::{SyntaxError, SyntaxKind, SyntaxMode, SyntaxNode}; /// An iterator over a source code string which returns tokens. #[derive(Clone)] @@ -13,28 +13,17 @@ pub(super) struct Lexer<'s> { s: Scanner<'s>, /// The mode the lexer is in. This determines which kinds of tokens it /// produces. - mode: LexMode, + mode: SyntaxMode, /// Whether the last token contained a newline. newline: bool, /// An error for the last token. error: Option, } -/// What kind of tokens to emit. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub(super) enum LexMode { - /// Text and markup. - Markup, - /// Math atoms, operators, etc. - Math, - /// Keywords, literals and operators. - Code, -} - impl<'s> Lexer<'s> { /// Create a new lexer with the given mode and a prefix to offset column /// calculations. - pub fn new(text: &'s str, mode: LexMode) -> Self { + pub fn new(text: &'s str, mode: SyntaxMode) -> Self { Self { s: Scanner::new(text), mode, @@ -44,12 +33,12 @@ impl<'s> Lexer<'s> { } /// Get the current lexing mode. - pub fn mode(&self) -> LexMode { + pub fn mode(&self) -> SyntaxMode { self.mode } /// Change the lexing mode. - pub fn set_mode(&mut self, mode: LexMode) { + pub fn set_mode(&mut self, mode: SyntaxMode) { self.mode = mode; } @@ -92,7 +81,7 @@ impl Lexer<'_> { } } -/// Shared methods with all [`LexMode`]. +/// Shared methods with all [`SyntaxMode`]. impl Lexer<'_> { /// Return the next token in our text. Returns both the [`SyntaxNode`] /// and the raw [`SyntaxKind`] to make it more ergonomic to check the kind @@ -114,14 +103,14 @@ impl Lexer<'_> { ); kind } - Some('`') if self.mode != LexMode::Math => return self.raw(), + Some('`') if self.mode != SyntaxMode::Math => return self.raw(), Some(c) => match self.mode { - LexMode::Markup => self.markup(start, c), - LexMode::Math => match self.math(start, c) { + SyntaxMode::Markup => self.markup(start, c), + SyntaxMode::Math => match self.math(start, c) { (kind, None) => kind, (kind, Some(node)) => return (kind, node), }, - LexMode::Code => self.code(start, c), + SyntaxMode::Code => self.code(start, c), }, None => SyntaxKind::End, @@ -145,7 +134,7 @@ impl Lexer<'_> { }; self.newline = newlines > 0; - if self.mode == LexMode::Markup && newlines >= 2 { + if self.mode == SyntaxMode::Markup && newlines >= 2 { SyntaxKind::Parbreak } else { SyntaxKind::Space @@ -965,9 +954,9 @@ impl ScannerExt for Scanner<'_> { /// Whether a character will become a [`SyntaxKind::Space`] token. #[inline] -fn is_space(character: char, mode: LexMode) -> bool { +fn is_space(character: char, mode: SyntaxMode) -> bool { match mode { - LexMode::Markup => matches!(character, ' ' | '\t') || is_newline(character), + SyntaxMode::Markup => matches!(character, ' ' | '\t') || is_newline(character), _ => character.is_whitespace(), } } diff --git a/crates/typst-syntax/src/lib.rs b/crates/typst-syntax/src/lib.rs index 1249f88e9..4741506c5 100644 --- a/crates/typst-syntax/src/lib.rs +++ b/crates/typst-syntax/src/lib.rs @@ -30,5 +30,16 @@ pub use self::path::VirtualPath; pub use self::source::Source; pub use self::span::{Span, Spanned}; -use self::lexer::{LexMode, Lexer}; +use self::lexer::Lexer; use self::parser::{reparse_block, reparse_markup}; + +/// The syntax mode of a portion of Typst code. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum SyntaxMode { + /// Text and markup, as in the top level. + Markup, + /// Math atoms, operators, etc., as in equations. + Math, + /// Keywords, literals and operators, as after hashes. + Code, +} diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs index a68815806..b452c2c09 100644 --- a/crates/typst-syntax/src/parser.rs +++ b/crates/typst-syntax/src/parser.rs @@ -7,12 +7,12 @@ use typst_utils::default_math_class; use unicode_math_class::MathClass; use crate::set::{syntax_set, SyntaxSet}; -use crate::{ast, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode}; +use crate::{ast, set, Lexer, SyntaxError, SyntaxKind, SyntaxMode, SyntaxNode}; /// Parses a source file as top-level markup. pub fn parse(text: &str) -> SyntaxNode { let _scope = typst_timing::TimingScope::new("parse"); - let mut p = Parser::new(text, 0, LexMode::Markup); + let mut p = Parser::new(text, 0, SyntaxMode::Markup); markup_exprs(&mut p, true, syntax_set!(End)); p.finish_into(SyntaxKind::Markup) } @@ -20,7 +20,7 @@ pub fn parse(text: &str) -> SyntaxNode { /// Parses top-level code. pub fn parse_code(text: &str) -> SyntaxNode { let _scope = typst_timing::TimingScope::new("parse code"); - let mut p = Parser::new(text, 0, LexMode::Code); + let mut p = Parser::new(text, 0, SyntaxMode::Code); code_exprs(&mut p, syntax_set!(End)); p.finish_into(SyntaxKind::Code) } @@ -28,7 +28,7 @@ pub fn parse_code(text: &str) -> SyntaxNode { /// Parses top-level math. pub fn parse_math(text: &str) -> SyntaxNode { let _scope = typst_timing::TimingScope::new("parse math"); - let mut p = Parser::new(text, 0, LexMode::Math); + let mut p = Parser::new(text, 0, SyntaxMode::Math); math_exprs(&mut p, syntax_set!(End)); p.finish_into(SyntaxKind::Math) } @@ -63,7 +63,7 @@ pub(super) fn reparse_markup( nesting: &mut usize, top_level: bool, ) -> Option> { - let mut p = Parser::new(text, range.start, LexMode::Markup); + let mut p = Parser::new(text, range.start, SyntaxMode::Markup); *at_start |= p.had_newline(); while !p.end() && p.current_start() < range.end { // If not top-level and at a new RightBracket, stop the reparse. @@ -205,7 +205,7 @@ fn reference(p: &mut Parser) { /// Parses a mathematical equation: `$x$`, `$ x^2 $`. fn equation(p: &mut Parser) { let m = p.marker(); - p.enter_modes(LexMode::Math, AtNewline::Continue, |p| { + p.enter_modes(SyntaxMode::Math, AtNewline::Continue, |p| { p.assert(SyntaxKind::Dollar); math(p, syntax_set!(Dollar, End)); p.expect_closing_delimiter(m, SyntaxKind::Dollar); @@ -615,7 +615,7 @@ fn code_exprs(p: &mut Parser, stop_set: SyntaxSet) { /// Parses an atomic code expression embedded in markup or math. fn embedded_code_expr(p: &mut Parser) { - p.enter_modes(LexMode::Code, AtNewline::Stop, |p| { + p.enter_modes(SyntaxMode::Code, AtNewline::Stop, |p| { p.assert(SyntaxKind::Hash); if p.had_trivia() || p.end() { p.expected("expression"); @@ -777,7 +777,7 @@ fn code_primary(p: &mut Parser, atomic: bool) { /// Reparses a full content or code block. pub(super) fn reparse_block(text: &str, range: Range) -> Option { - let mut p = Parser::new(text, range.start, LexMode::Code); + let mut p = Parser::new(text, range.start, SyntaxMode::Code); assert!(p.at(SyntaxKind::LeftBracket) || p.at(SyntaxKind::LeftBrace)); block(&mut p); (p.balanced && p.prev_end() == range.end) @@ -796,7 +796,7 @@ fn block(p: &mut Parser) { /// Parses a code block: `{ let x = 1; x + 2 }`. fn code_block(p: &mut Parser) { let m = p.marker(); - p.enter_modes(LexMode::Code, AtNewline::Continue, |p| { + p.enter_modes(SyntaxMode::Code, AtNewline::Continue, |p| { p.assert(SyntaxKind::LeftBrace); code(p, syntax_set!(RightBrace, RightBracket, RightParen, End)); p.expect_closing_delimiter(m, SyntaxKind::RightBrace); @@ -807,7 +807,7 @@ fn code_block(p: &mut Parser) { /// Parses a content block: `[*Hi* there!]`. fn content_block(p: &mut Parser) { let m = p.marker(); - p.enter_modes(LexMode::Markup, AtNewline::Continue, |p| { + p.enter_modes(SyntaxMode::Markup, AtNewline::Continue, |p| { p.assert(SyntaxKind::LeftBracket); markup(p, true, true, syntax_set!(RightBracket, End)); p.expect_closing_delimiter(m, SyntaxKind::RightBracket); @@ -1516,10 +1516,10 @@ fn pattern_leaf<'s>( /// ### Modes /// /// The parser manages the transitions between the three modes of Typst through -/// [lexer modes](`LexMode`) and [newline modes](`AtNewline`). +/// [syntax modes](`SyntaxMode`) and [newline modes](`AtNewline`). /// -/// The lexer modes map to the three Typst modes and are stored in the lexer, -/// changing which`SyntaxKind`s it will generate. +/// The syntax modes map to the three Typst modes and are stored in the lexer, +/// changing which `SyntaxKind`s it will generate. /// /// The newline mode is used to determine whether a newline should end the /// current expression. If so, the parser temporarily changes `token`'s kind to @@ -1529,7 +1529,7 @@ struct Parser<'s> { /// The source text shared with the lexer. text: &'s str, /// A lexer over the source text with multiple modes. Defines the boundaries - /// of tokens and determines their [`SyntaxKind`]. Contains the [`LexMode`] + /// of tokens and determines their [`SyntaxKind`]. Contains the [`SyntaxMode`] /// defining our current Typst mode. lexer: Lexer<'s>, /// The newline mode: whether to insert a temporary end at newlines. @@ -1612,7 +1612,7 @@ impl AtNewline { AtNewline::RequireColumn(min_col) => { // When the column is `None`, the newline doesn't start a // column, and we continue parsing. This may happen on the - // boundary of lexer modes, since we only report a column in + // boundary of syntax modes, since we only report a column in // Markup. column.is_some_and(|column| column <= min_col) } @@ -1643,8 +1643,8 @@ impl IndexMut for Parser<'_> { /// Creating/Consuming the parser and getting info about the current token. impl<'s> Parser<'s> { - /// Create a new parser starting from the given text offset and lexer mode. - fn new(text: &'s str, offset: usize, mode: LexMode) -> Self { + /// Create a new parser starting from the given text offset and syntax mode. + fn new(text: &'s str, offset: usize, mode: SyntaxMode) -> Self { let mut lexer = Lexer::new(text, mode); lexer.jump(offset); let nl_mode = AtNewline::Continue; @@ -1825,13 +1825,13 @@ impl<'s> Parser<'s> { self.nodes.insert(from, SyntaxNode::inner(kind, children)); } - /// Parse within the [`LexMode`] for subsequent tokens (does not change the + /// Parse within the [`SyntaxMode`] for subsequent tokens (does not change the /// current token). This may re-lex the final token on exit. /// /// This function effectively repurposes the call stack as a stack of modes. fn enter_modes( &mut self, - mode: LexMode, + mode: SyntaxMode, stop: AtNewline, func: impl FnOnce(&mut Parser<'s>), ) { @@ -1891,7 +1891,8 @@ impl<'s> Parser<'s> { } let newline = if had_newline { - let column = (lexer.mode() == LexMode::Markup).then(|| lexer.column(start)); + let column = + (lexer.mode() == SyntaxMode::Markup).then(|| lexer.column(start)); let newline = Newline { column, parbreak }; if nl_mode.stop_at(newline, kind) { // Insert a temporary `SyntaxKind::End` to halt the parser. @@ -1938,7 +1939,7 @@ struct Checkpoint { #[derive(Clone)] struct PartialState { cursor: usize, - lex_mode: LexMode, + lex_mode: SyntaxMode, token: Token, }