From be49935753f0e37ae8e04fb53111e6f116c63f47 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 21 Feb 2024 09:38:47 +0100 Subject: [PATCH] Destructuring improvements (#3463) --- crates/typst-ide/src/complete.rs | 4 +- crates/typst-syntax/src/ast.rs | 240 +++--- crates/typst-syntax/src/kind.rs | 2 +- crates/typst-syntax/src/node.rs | 37 +- crates/typst-syntax/src/parser.rs | 1136 +++++++++++++------------- crates/typst-syntax/src/set.rs | 226 ++--- crates/typst/src/eval/binding.rs | 97 ++- crates/typst/src/eval/call.rs | 28 +- crates/typst/src/eval/code.rs | 9 +- tests/src/tests.rs | 2 +- tests/typ/bugs/parenthesized.typ | 86 ++ tests/typ/compiler/backtracking.typ | 33 + tests/typ/compiler/block.typ | 3 +- tests/typ/compiler/call.typ | 3 +- tests/typ/compiler/closure.typ | 28 +- tests/typ/compiler/dict.typ | 3 +- tests/typ/compiler/embedded-expr.typ | 4 +- tests/typ/compiler/for.typ | 23 +- tests/typ/compiler/let.typ | 54 +- tests/typ/compiler/ops.typ | 32 + tests/typ/compiler/spread.typ | 8 +- tests/typ/compute/foundations.typ | 2 +- 22 files changed, 1132 insertions(+), 928 deletions(-) create mode 100644 tests/typ/bugs/parenthesized.typ create mode 100644 tests/typ/compiler/backtracking.typ diff --git a/crates/typst-ide/src/complete.rs b/crates/typst-ide/src/complete.rs index 8acf1540b..b58a9bcc1 100644 --- a/crates/typst-ide/src/complete.rs +++ b/crates/typst-ide/src/complete.rs @@ -1285,7 +1285,7 @@ impl<'a> CompletionContext<'a> { let mut sibling = Some(node.clone()); while let Some(node) = &sibling { if let Some(v) = node.cast::() { - for ident in v.kind().idents() { + for ident in v.kind().bindings() { defined.insert(ident.get().clone()); } } @@ -1323,7 +1323,7 @@ impl<'a> CompletionContext<'a> { if let Some(v) = parent.cast::() { if node.prev_sibling_kind() != Some(SyntaxKind::In) { let pattern = v.pattern(); - for ident in pattern.idents() { + for ident in pattern.bindings() { defined.insert(ident.get().clone()); } } diff --git a/crates/typst-syntax/src/ast.rs b/crates/typst-syntax/src/ast.rs index 6f4b52f02..6dd9b5f6f 100644 --- a/crates/typst-syntax/src/ast.rs +++ b/crates/typst-syntax/src/ast.rs @@ -1157,9 +1157,18 @@ node! { impl<'a> Parenthesized<'a> { /// The wrapped expression. + /// + /// Should only be accessed if this is contained in an `Expr`. pub fn expr(self) -> Expr<'a> { self.0.cast_first_match().unwrap_or_default() } + + /// The wrapped pattern. + /// + /// Should only be accessed if this is contained in a `Pattern`. + pub fn pattern(self) -> Pattern<'a> { + self.0.cast_first_match().unwrap_or_default() + } } node! { @@ -1180,13 +1189,13 @@ pub enum ArrayItem<'a> { /// A bare expression: `12`. Pos(Expr<'a>), /// A spread expression: `..things`. - Spread(Expr<'a>), + Spread(Spread<'a>), } impl<'a> AstNode<'a> for ArrayItem<'a> { fn from_untyped(node: &'a SyntaxNode) -> Option { match node.kind() { - SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), + SyntaxKind::Spread => node.cast().map(Self::Spread), _ => node.cast().map(Self::Pos), } } @@ -1219,7 +1228,7 @@ pub enum DictItem<'a> { /// A keyed pair: `"spacy key": true`. Keyed(Keyed<'a>), /// A spread expression: `..things`. - Spread(Expr<'a>), + Spread(Spread<'a>), } impl<'a> AstNode<'a> for DictItem<'a> { @@ -1227,7 +1236,7 @@ impl<'a> AstNode<'a> for DictItem<'a> { match node.kind() { SyntaxKind::Named => node.cast().map(Self::Named), SyntaxKind::Keyed => node.cast().map(Self::Keyed), - SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), + SyntaxKind::Spread => node.cast().map(Self::Spread), _ => Option::None, } } @@ -1253,13 +1262,19 @@ impl<'a> Named<'a> { } /// The right-hand side of the pair: `3pt`. + /// + /// This should only be accessed if this `Named` is contained in a + /// `DictItem`, `Arg`, or `Param`. pub fn expr(self) -> Expr<'a> { self.0.cast_last_match().unwrap_or_default() } - /// The right-hand side of the pair as an identifier. - pub fn expr_ident(self) -> Option> { - self.0.cast_last_match() + /// The right-hand side of the pair as a pattern. + /// + /// This should only be accessed if this `Named` is contained in a + /// `Destructuring`. + pub fn pattern(self) -> Pattern<'a> { + self.0.cast_last_match().unwrap_or_default() } } @@ -1275,11 +1290,45 @@ impl<'a> Keyed<'a> { } /// The right-hand side of the pair: `true`. + /// + /// This should only be accessed if this `Keyed` is contained in a + /// `DictItem`. pub fn expr(self) -> Expr<'a> { self.0.cast_last_match().unwrap_or_default() } } +node! { + /// A spread: `..x` or `..x.at(0)`. + Spread +} + +impl<'a> Spread<'a> { + /// The spreaded expression. + /// + /// This should only be accessed if this `Spread` is contained in an + /// `ArrayItem`, `DictItem`, or `Arg`. + pub fn expr(self) -> Expr<'a> { + self.0.cast_first_match().unwrap_or_default() + } + + /// The sink identifier, if present. + /// + /// This should only be accessed if this `Spread` is contained in a + /// `Param` or binding `DestructuringItem`. + pub fn sink_ident(self) -> Option> { + self.0.cast_first_match() + } + + /// The sink expressions, if present. + /// + /// This should only be accessed if this `Spread` is contained in a + /// `DestructuringItem`. + pub fn sink_expr(self) -> Option> { + self.0.cast_first_match() + } +} + node! { /// A unary operation: `-x`. Unary @@ -1591,14 +1640,14 @@ pub enum Arg<'a> { /// A named argument: `draw: false`. Named(Named<'a>), /// A spread argument: `..things`. - Spread(Expr<'a>), + Spread(Spread<'a>), } impl<'a> AstNode<'a> for Arg<'a> { fn from_untyped(node: &'a SyntaxNode) -> Option { match node.kind() { SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), + SyntaxKind::Spread => node.cast().map(Self::Spread), _ => node.cast().map(Self::Pos), } } @@ -1648,28 +1697,6 @@ impl<'a> Params<'a> { } } -node! { - /// A spread: `..x` or `..x.at(0)`. - Spread -} - -impl<'a> Spread<'a> { - /// Try to get an identifier. - pub fn name(self) -> Option> { - self.0.cast_first_match() - } - - /// Try to get an expression. - pub fn expr(self) -> Option> { - self.0.cast_first_match() - } -} - -node! { - /// An underscore: `_` - Underscore -} - /// A parameter to a closure. #[derive(Debug, Copy, Clone, Hash)] pub enum Param<'a> { @@ -1677,15 +1704,15 @@ pub enum Param<'a> { Pos(Pattern<'a>), /// A named parameter with a default value: `draw: false`. Named(Named<'a>), - /// An argument sink: `..args`. - Sink(Spread<'a>), + /// An argument sink: `..args` or `..`. + Spread(Spread<'a>), } impl<'a> AstNode<'a> for Param<'a> { fn from_untyped(node: &'a SyntaxNode) -> Option { match node.kind() { SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Spread => node.cast().map(Self::Sink), + SyntaxKind::Spread => node.cast().map(Self::Spread), _ => node.cast().map(Self::Pos), } } @@ -1694,62 +1721,7 @@ impl<'a> AstNode<'a> for Param<'a> { match self { Self::Pos(v) => v.to_untyped(), Self::Named(v) => v.to_untyped(), - Self::Sink(v) => v.to_untyped(), - } - } -} - -node! { - /// A destructuring pattern: `x` or `(x, _, ..y)`. - Destructuring -} - -impl<'a> Destructuring<'a> { - /// The bindings of the destructuring. - pub fn bindings(self) -> impl DoubleEndedIterator> { - self.0.children().filter_map(SyntaxNode::cast) - } - - /// Returns a list of all identifiers in the pattern. - pub fn idents(self) -> impl DoubleEndedIterator> { - self.bindings().filter_map(|binding| match binding { - DestructuringKind::Normal(Expr::Ident(ident)) => Some(ident), - DestructuringKind::Sink(spread) => spread.name(), - DestructuringKind::Named(named) => named.expr_ident(), - _ => Option::None, - }) - } -} - -/// The kind of an element in a destructuring pattern. -#[derive(Debug, Copy, Clone, Hash)] -pub enum DestructuringKind<'a> { - /// An expression: `x`. - Normal(Expr<'a>), - /// An argument sink: `..y`. - Sink(Spread<'a>), - /// Named arguments: `x: 1`. - Named(Named<'a>), - /// A placeholder: `_`. - Placeholder(Underscore<'a>), -} - -impl<'a> AstNode<'a> for DestructuringKind<'a> { - fn from_untyped(node: &'a SyntaxNode) -> Option { - match node.kind() { - SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Spread => node.cast().map(Self::Sink), - SyntaxKind::Underscore => node.cast().map(Self::Placeholder), - _ => node.cast().map(Self::Normal), - } - } - - fn to_untyped(self) -> &'a SyntaxNode { - match self { - Self::Normal(v) => v.to_untyped(), - Self::Named(v) => v.to_untyped(), - Self::Sink(v) => v.to_untyped(), - Self::Placeholder(v) => v.to_untyped(), + Self::Spread(v) => v.to_untyped(), } } } @@ -1761,6 +1733,8 @@ pub enum Pattern<'a> { Normal(Expr<'a>), /// A placeholder: `_`. Placeholder(Underscore<'a>), + /// A parenthesized pattern. + Parenthesized(Parenthesized<'a>), /// A destructuring pattern: `(x, _, ..y)`. Destructuring(Destructuring<'a>), } @@ -1768,8 +1742,9 @@ pub enum Pattern<'a> { impl<'a> AstNode<'a> for Pattern<'a> { fn from_untyped(node: &'a SyntaxNode) -> Option { match node.kind() { - SyntaxKind::Destructuring => node.cast().map(Self::Destructuring), SyntaxKind::Underscore => node.cast().map(Self::Placeholder), + SyntaxKind::Parenthesized => node.cast().map(Self::Parenthesized), + SyntaxKind::Destructuring => node.cast().map(Self::Destructuring), _ => node.cast().map(Self::Normal), } } @@ -1777,18 +1752,20 @@ impl<'a> AstNode<'a> for Pattern<'a> { fn to_untyped(self) -> &'a SyntaxNode { match self { Self::Normal(v) => v.to_untyped(), - Self::Destructuring(v) => v.to_untyped(), Self::Placeholder(v) => v.to_untyped(), + Self::Parenthesized(v) => v.to_untyped(), + Self::Destructuring(v) => v.to_untyped(), } } } impl<'a> Pattern<'a> { - /// Returns a list of all identifiers in the pattern. - pub fn idents(self) -> Vec> { + /// Returns a list of all new bindings introduced by the pattern. + pub fn bindings(self) -> Vec> { match self { - Pattern::Normal(Expr::Ident(ident)) => vec![ident], - Pattern::Destructuring(destruct) => destruct.idents().collect(), + Self::Normal(Expr::Ident(ident)) => vec![ident], + Self::Parenthesized(v) => v.pattern().bindings(), + Self::Destructuring(v) => v.bindings(), _ => vec![], } } @@ -1800,6 +1777,65 @@ impl Default for Pattern<'_> { } } +node! { + /// An underscore: `_` + Underscore +} + +node! { + /// A destructuring pattern: `x` or `(x, _, ..y)`. + Destructuring +} + +impl<'a> Destructuring<'a> { + /// The items of the destructuring. + pub fn items(self) -> impl DoubleEndedIterator> { + self.0.children().filter_map(SyntaxNode::cast) + } + + /// Returns a list of all new bindings introduced by the destructuring. + pub fn bindings(self) -> Vec> { + self.items() + .flat_map(|binding| match binding { + DestructuringItem::Pattern(pattern) => pattern.bindings(), + DestructuringItem::Named(named) => named.pattern().bindings(), + DestructuringItem::Spread(spread) => { + spread.sink_ident().into_iter().collect() + } + }) + .collect() + } +} + +/// The kind of an element in a destructuring pattern. +#[derive(Debug, Copy, Clone, Hash)] +pub enum DestructuringItem<'a> { + /// A sub-pattern: `x`. + Pattern(Pattern<'a>), + /// A renamed destructuring: `x: y`. + Named(Named<'a>), + /// A destructuring sink: `..y` or `..`. + Spread(Spread<'a>), +} + +impl<'a> AstNode<'a> for DestructuringItem<'a> { + fn from_untyped(node: &'a SyntaxNode) -> Option { + match node.kind() { + SyntaxKind::Named => node.cast().map(Self::Named), + SyntaxKind::Spread => node.cast().map(Self::Spread), + _ => node.cast().map(Self::Pattern), + } + } + + fn to_untyped(self) -> &'a SyntaxNode { + match self { + Self::Pattern(v) => v.to_untyped(), + Self::Named(v) => v.to_untyped(), + Self::Spread(v) => v.to_untyped(), + } + } +} + node! { /// A let binding: `let x = 1`. LetBinding @@ -1815,13 +1851,11 @@ pub enum LetBindingKind<'a> { } impl<'a> LetBindingKind<'a> { - /// Returns a list of all identifiers in the pattern. - pub fn idents(self) -> Vec> { + /// Returns a list of all new bindings introduced by the let binding. + pub fn bindings(self) -> Vec> { match self { - LetBindingKind::Normal(pattern) => pattern.idents(), - LetBindingKind::Closure(ident) => { - vec![ident] - } + LetBindingKind::Normal(pattern) => pattern.bindings(), + LetBindingKind::Closure(ident) => vec![ident], } } } @@ -1840,7 +1874,7 @@ impl<'a> LetBinding<'a> { /// The expression the binding is initialized with. pub fn init(self) -> Option> { match self.kind() { - LetBindingKind::Normal(Pattern::Normal(_)) => { + LetBindingKind::Normal(Pattern::Normal(_) | Pattern::Parenthesized(_)) => { self.0.children().filter_map(SyntaxNode::cast).nth(1) } LetBindingKind::Normal(_) => self.0.cast_first_match(), diff --git a/crates/typst-syntax/src/kind.rs b/crates/typst-syntax/src/kind.rs index a772175e5..536c9381b 100644 --- a/crates/typst-syntax/src/kind.rs +++ b/crates/typst-syntax/src/kind.rs @@ -136,7 +136,7 @@ pub enum SyntaxKind { StarEq, /// The divide-assign operator: `/=`. SlashEq, - /// The spread operator: `..`. + /// Indicates a spread or sink: `..`. Dots, /// An arrow between a closure's parameters and body: `=>`. Arrow, diff --git a/crates/typst-syntax/src/node.rs b/crates/typst-syntax/src/node.rs index fed7049ca..3c93cd847 100644 --- a/crates/typst-syntax/src/node.rs +++ b/crates/typst-syntax/src/node.rs @@ -3,7 +3,7 @@ use std::ops::{Deref, Range}; use std::rc::Rc; use std::sync::Arc; -use ecow::{eco_vec, EcoString, EcoVec}; +use ecow::{eco_format, eco_vec, EcoString, EcoVec}; use crate::ast::AstNode; use crate::{FileId, Span, SyntaxKind}; @@ -177,14 +177,9 @@ impl SyntaxNode { } impl SyntaxNode { - /// Mark this node as erroneous. - pub(super) fn make_erroneous(&mut self) { - if let Repr::Inner(inner) = &mut self.0 { - Arc::make_mut(inner).erroneous = true; - } - } - /// Convert the child to another kind. + /// + /// Don't use this for converting to an error! #[track_caller] pub(super) fn convert_to_kind(&mut self, kind: SyntaxKind) { debug_assert!(!kind.is_error()); @@ -195,10 +190,30 @@ impl SyntaxNode { } } - /// Convert the child to an error. + /// Convert the child to an error, if it isn't already one. pub(super) fn convert_to_error(&mut self, message: impl Into) { - let text = std::mem::take(self).into_text(); - *self = SyntaxNode::error(message, text); + if !self.kind().is_error() { + let text = std::mem::take(self).into_text(); + *self = SyntaxNode::error(message, text); + } + } + + /// Convert the child to an error stating that the given thing was + /// expected, but the current kind was found. + pub(super) fn expected(&mut self, expected: &str) { + let kind = self.kind(); + self.convert_to_error(eco_format!("expected {expected}, found {}", kind.name())); + if kind.is_keyword() && matches!(expected, "identifier" | "pattern") { + self.hint(eco_format!( + "keyword `{text}` is not allowed as an identifier; try `{text}_` instead", + text = self.text(), + )); + } + } + + /// Convert the child to an error stating it was unexpected. + pub(super) fn unexpected(&mut self) { + self.convert_to_error(eco_format!("unexpected {}", self.kind().name())); } /// Assign spans to each node. diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs index a1bd5ad0d..32e15cb77 100644 --- a/crates/typst-syntax/src/parser.rs +++ b/crates/typst-syntax/src/parser.rs @@ -1,5 +1,6 @@ -use std::collections::HashSet; -use std::ops::Range; +use std::collections::{HashMap, HashSet}; +use std::mem; +use std::ops::{Index, IndexMut, Range}; use ecow::{eco_format, EcoString}; use unicode_math_class::MathClass; @@ -145,11 +146,10 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) { /// Parses strong content: `*Strong*`. fn strong(p: &mut Parser) { - const END: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Star, - SyntaxKind::Parbreak, - SyntaxKind::RightBracket, - ]); + const END: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Star) + .add(SyntaxKind::Parbreak) + .add(SyntaxKind::RightBracket); let m = p.marker(); p.assert(SyntaxKind::Star); @@ -160,11 +160,10 @@ fn strong(p: &mut Parser) { /// Parses emphasized content: `_Emphasized_`. fn emph(p: &mut Parser) { - const END: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Underscore, - SyntaxKind::Parbreak, - SyntaxKind::RightBracket, - ]); + const END: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Underscore) + .add(SyntaxKind::Parbreak) + .add(SyntaxKind::RightBracket); let m = p.marker(); p.assert(SyntaxKind::Underscore); @@ -175,15 +174,16 @@ fn emph(p: &mut Parser) { /// Parses a section heading: `= Introduction`. fn heading(p: &mut Parser) { - const END: SyntaxSet = - SyntaxSet::new(&[SyntaxKind::Label, SyntaxKind::RightBracket, SyntaxKind::Space]); + const END: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Label) + .add(SyntaxKind::RightBracket) + .add(SyntaxKind::Space); let m = p.marker(); p.assert(SyntaxKind::HeadingMarker); whitespace_line(p); markup(p, false, usize::MAX, |p| { - p.at_set(END) - && (!p.at(SyntaxKind::Space) || p.lexer.clone().next() == SyntaxKind::Label) + p.at_set(END) && (!p.at(SyntaxKind::Space) || p.peek() == SyntaxKind::Label) }); p.wrap(m, SyntaxKind::Heading); } @@ -211,7 +211,7 @@ fn enum_item(p: &mut Parser) { /// Parses an item in a term list: `/ Term: Details`. fn term_item(p: &mut Parser) { const TERM_END: SyntaxSet = - SyntaxSet::new(&[SyntaxKind::Colon, SyntaxKind::RightBracket]); + SyntaxSet::new().add(SyntaxKind::Colon).add(SyntaxKind::RightBracket); let m = p.marker(); p.assert(SyntaxKind::TermMarker); @@ -615,11 +615,7 @@ fn code_exprs(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) { /// Parses a single code expression. fn code_expr(p: &mut Parser) { - code_expr_prec(p, false, 0, false) -} - -fn code_expr_or_pattern(p: &mut Parser) { - code_expr_prec(p, false, 0, true) + code_expr_prec(p, false, 0) } /// Parses a code expression embedded in markup or math. @@ -631,7 +627,7 @@ fn embedded_code_expr(p: &mut Parser) { let stmt = p.at_set(set::STMT); let at = p.at_set(set::ATOMIC_CODE_EXPR); - code_expr_prec(p, true, 0, false); + code_expr_prec(p, true, 0); // Consume error for things like `#12p` or `#"abc\"`.# if !at && !p.current().is_trivia() && !p.eof() { @@ -650,20 +646,15 @@ fn embedded_code_expr(p: &mut Parser) { } /// Parses a code expression with at least the given precedence. -fn code_expr_prec( - p: &mut Parser, - atomic: bool, - min_prec: usize, - allow_destructuring: bool, -) { +fn code_expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) { let m = p.marker(); if !atomic && p.at_set(set::UNARY_OP) { let op = ast::UnOp::from_kind(p.current()).unwrap(); p.eat(); - code_expr_prec(p, atomic, op.precedence(), false); + code_expr_prec(p, atomic, op.precedence()); p.wrap(m, SyntaxKind::Unary); } else { - code_primary(p, atomic, allow_destructuring); + code_primary(p, atomic); } loop { @@ -675,7 +666,7 @@ fn code_expr_prec( } let at_field_or_method = - p.directly_at(SyntaxKind::Dot) && p.lexer.clone().next() == SyntaxKind::Ident; + p.directly_at(SyntaxKind::Dot) && p.peek() == SyntaxKind::Ident; if atomic && !at_field_or_method { break; @@ -713,7 +704,7 @@ fn code_expr_prec( } p.eat(); - code_expr_prec(p, false, prec, false); + code_expr_prec(p, false, prec); p.wrap(m, SyntaxKind::Binary); continue; } @@ -725,7 +716,7 @@ fn code_expr_prec( /// Parses an primary in a code expression. These are the atoms that unary and /// binary operations, functions calls, and field accesses start with / are /// composed of. -fn code_primary(p: &mut Parser, atomic: bool, allow_destructuring: bool) { +fn code_primary(p: &mut Parser, atomic: bool) { let m = p.marker(); match p.current() { SyntaxKind::Ident => { @@ -744,14 +735,17 @@ fn code_primary(p: &mut Parser, atomic: bool, allow_destructuring: bool) { p.eat(); code_expr(p); p.wrap(m, SyntaxKind::Closure); - } else if let Some(underscore) = p.node_mut(m) { - underscore.convert_to_error("expected expression, found underscore"); + } else if p.eat_if(SyntaxKind::Eq) { + code_expr(p); + p.wrap(m, SyntaxKind::DestructAssignment); + } else { + p[m].expected("expression"); } } SyntaxKind::LeftBrace => code_block(p), SyntaxKind::LeftBracket => content_block(p), - SyntaxKind::LeftParen => with_paren(p, allow_destructuring), + SyntaxKind::LeftParen => expr_with_paren(p), SyntaxKind::Dollar => equation(p), SyntaxKind::Let => let_binding(p), SyntaxKind::Set => set_rule(p), @@ -799,11 +793,10 @@ pub(super) fn reparse_block(text: &str, range: Range) -> Option validate_array_at(p, m), - SyntaxKind::Dict if !allow_destructuring => validate_dict_at(p, m), - SyntaxKind::Parenthesized if !allow_destructuring => { - validate_parenthesized_at(p, m) - } - SyntaxKind::Destructuring if !allow_destructuring => { - invalidate_destructuring(p, m) - } - _ => {} - } - p.wrap(m, kind); -} - -fn invalidate_destructuring(p: &mut Parser, m: Marker) { - let mut collection_kind = Option::None; - for child in p.post_process(m) { - match child.kind() { - SyntaxKind::Named | SyntaxKind::Keyed => match collection_kind { - Some(SyntaxKind::Array) => child.convert_to_error(eco_format!( - "expected expression, found {}", - child.kind().name() - )), - _ => collection_kind = Some(SyntaxKind::Dict), - }, - SyntaxKind::LeftParen | SyntaxKind::RightParen | SyntaxKind::Comma => {} - kind => match collection_kind { - Some(SyntaxKind::Dict) => child.convert_to_error(eco_format!( - "expected named or keyed pair, found {}", - kind.name() - )), - _ => collection_kind = Some(SyntaxKind::Array), - }, - } - } -} - -fn collection(p: &mut Parser, keyed: bool) -> SyntaxKind { - p.enter_newline_mode(NewlineMode::Continue); - - let m = p.marker(); - p.assert(SyntaxKind::LeftParen); - - let mut count = 0; - let mut parenthesized = true; - let mut kind = None; - if keyed && p.eat_if(SyntaxKind::Colon) { - kind = Some(SyntaxKind::Dict); - parenthesized = false; - } - - while !p.current().is_terminator() { - let prev = p.prev_end(); - match item(p, keyed) { - SyntaxKind::Spread => parenthesized = false, - SyntaxKind::Named | SyntaxKind::Keyed => { - match kind { - Some(SyntaxKind::Array) => kind = Some(SyntaxKind::Destructuring), - _ => kind = Some(SyntaxKind::Dict), - } - parenthesized = false; - } - SyntaxKind::Int => match kind { - Some(SyntaxKind::Array) | None => kind = Some(SyntaxKind::Array), - Some(_) => kind = Some(SyntaxKind::Destructuring), - }, - _ if kind.is_none() => kind = Some(SyntaxKind::Array), - _ => {} - } - - if !p.progress(prev) { - p.unexpected(); - continue; - } - - count += 1; - - if p.current().is_terminator() { - break; - } - - if p.expect(SyntaxKind::Comma) { - parenthesized = false; - } - } - - p.expect_closing_delimiter(m, SyntaxKind::RightParen); - p.exit_newline_mode(); - - if parenthesized && count == 1 { - SyntaxKind::Parenthesized - } else { - kind.unwrap_or(SyntaxKind::Array) - } -} - -fn item(p: &mut Parser, keyed: bool) -> SyntaxKind { - let m = p.marker(); - - if p.eat_if(SyntaxKind::Dots) { - if p.at(SyntaxKind::Comma) || p.at(SyntaxKind::RightParen) { - p.wrap(m, SyntaxKind::Spread); - return SyntaxKind::Spread; - } - - code_expr(p); - p.wrap(m, SyntaxKind::Spread); - return SyntaxKind::Spread; - } - - if p.at(SyntaxKind::Underscore) { - // This is a temporary workaround to fix `v.map(_ => {})`. - let mut lexer = p.lexer.clone(); - let next = - std::iter::from_fn(|| Some(lexer.next())).find(|kind| !kind.is_trivia()); - if next != Some(SyntaxKind::Arrow) { - p.eat(); - return SyntaxKind::Underscore; - } - } - - code_expr_or_pattern(p); - - if !p.eat_if(SyntaxKind::Colon) { - return SyntaxKind::Int; - } - - if !p.eat_if(SyntaxKind::Underscore) { - code_expr(p); - } - - let kind = match p.node(m).map(SyntaxNode::kind) { - Some(SyntaxKind::Ident) => SyntaxKind::Named, - Some(_) if keyed => SyntaxKind::Keyed, - _ => { - for child in p.post_process(m) { - if child.kind() == SyntaxKind::Colon { - break; - } - - let expected = if keyed { "expression" } else { "identifier" }; - let message = eco_format!( - "expected {expected}, found {found}", - found = child.kind().name(), - ); - child.convert_to_error(message); - } - SyntaxKind::Named - } - }; - - p.wrap(m, kind); - kind -} - -/// Parses a function call's argument list: `(12pt, y)`. -fn args(p: &mut Parser) { - if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) { - p.expected("argument list"); - } - - let m = p.marker(); - if p.at(SyntaxKind::LeftParen) { - collection(p, false); - validate_args_at(p, m); - } - - while p.directly_at(SyntaxKind::LeftBracket) { - content_block(p); - } - - p.wrap(m, SyntaxKind::Args); -} - -enum PatternKind { - Ident, - Other, -} - -/// Parses a pattern that can be assigned to. -fn pattern(p: &mut Parser) -> PatternKind { - let m = p.marker(); - if p.at(SyntaxKind::LeftParen) { - let kind = collection(p, false); - validate_pattern_at(p, m, true); - - if kind != SyntaxKind::Parenthesized { - p.wrap(m, SyntaxKind::Destructuring); - } - PatternKind::Other - } else if p.eat_if(SyntaxKind::Underscore) { - PatternKind::Other - } else { - p.expect(SyntaxKind::Ident); - PatternKind::Ident - } -} - /// Parses a let binding: `let x = 1`. fn let_binding(p: &mut Parser) { let m = p.marker(); @@ -1052,17 +828,15 @@ fn let_binding(p: &mut Parser) { let m2 = p.marker(); let mut closure = false; let mut other = false; - match pattern(p) { - PatternKind::Ident => { - if p.directly_at(SyntaxKind::LeftParen) { - let m3 = p.marker(); - collection(p, false); - validate_params_at(p, m3); - p.wrap(m3, SyntaxKind::Params); - closure = true; - } + + if p.eat_if(SyntaxKind::Ident) { + if p.directly_at(SyntaxKind::LeftParen) { + params(p); + closure = true; } - PatternKind::Other => other = true, + } else { + pattern(p, false, &mut HashSet::new(), None); + other = true; } let f = if closure || other { Parser::expect } else { Parser::eat_if }; @@ -1144,17 +918,21 @@ fn while_loop(p: &mut Parser) { fn for_loop(p: &mut Parser) { let m = p.marker(); p.assert(SyntaxKind::For); - pattern(p); - if p.at(SyntaxKind::Comma) { - p.expected("keyword `in`"); - p.hint("did you mean to use a destructuring pattern?"); - if !p.eat_if(SyntaxKind::Ident) { - p.eat_if(SyntaxKind::Underscore); + + let mut seen = HashSet::new(); + pattern(p, false, &mut seen, None); + + let m2 = p.marker(); + if p.eat_if(SyntaxKind::Comma) { + let node = &mut p[m2]; + node.unexpected(); + node.hint("destructuring patterns must be wrapped in parentheses"); + if p.at_set(set::PATTERN) { + pattern(p, false, &mut seen, None); } - p.eat_if(SyntaxKind::In); - } else { - p.expect(SyntaxKind::In); } + + p.expect(SyntaxKind::In); code_expr(p); block(p); p.wrap(m, SyntaxKind::ForLoop); @@ -1192,10 +970,9 @@ fn import_items(p: &mut Parser) { p.wrap(item_marker, SyntaxKind::RenamedImportItem); } - if p.current().is_terminator() { - break; + if !p.current().is_terminator() { + p.expect(SyntaxKind::Comma); } - p.expect(SyntaxKind::Comma); } p.wrap(m, SyntaxKind::ImportItems); } @@ -1232,247 +1009,426 @@ fn return_stmt(p: &mut Parser) { p.wrap(m, SyntaxKind::FuncReturn); } -fn validate_parenthesized_at(p: &mut Parser, m: Marker) { - for child in p.post_process(m) { - let kind = child.kind(); - match kind { - SyntaxKind::Array => validate_array(child.children_mut().iter_mut()), - SyntaxKind::Dict => validate_dict(child.children_mut().iter_mut()), - SyntaxKind::Underscore => { - child.convert_to_error(eco_format!( - "expected expression, found {}", - kind.name() - )); +/// An expression that starts with a parenthesis. +fn expr_with_paren(p: &mut Parser) { + // If we've seen this position before and have a memoized result, just use + // it. See below for more explanation about this memoization. + let start = p.current_start(); + if let Some((range, end_point)) = p.memo.get(&start) { + p.nodes.extend(p.memo_arena[range.clone()].iter().cloned()); + p.restore(end_point.clone()); + return; + } + + let m = p.marker(); + let checkpoint = p.checkpoint(); + + // When we reach a '(', we can't be sure what it is. First, we attempt to + // parse as a simple parenthesized expression, array, or dictionary as + // these are the most likely things. We can handle all of those in a single + // pass. + let kind = parenthesized_or_array_or_dict(p); + + // If, however, '=>' or '=' follows, we must backtrack and reparse as either + // a parameter list or a destructuring. To be able to do that, we created a + // parser checkpoint before our speculative parse, which we can restore. + // + // However, naive backtracking has a fatal flaw: It can lead to exponential + // parsing time if we are constantly getting things wrong in a nested + // scenario. The particular failure case for parameter parsing is the + // following: `(x: (x: (x) => y) => y) => y` + // + // Such a structure will reparse over and over again recursively, leading to + // a running time of O(2^n) for nesting depth n. To prevent this, we perform + // a simple trick: When we have done the mistake of picking the wrong path + // once and have subsequently parsed correctly, we save the result of that + // correct parsing in the `p.memo` map. When we reach the same position + // again, we can then just restore this result. In this way, no + // parenthesized expression is parsed more than twice, leading to a worst + // case running time of O(2n). + if p.at(SyntaxKind::Arrow) { + p.restore(checkpoint); + params(p); + p.assert(SyntaxKind::Arrow); + code_expr(p); + p.wrap(m, SyntaxKind::Closure); + } else if p.at(SyntaxKind::Eq) && kind != SyntaxKind::Parenthesized { + p.restore(checkpoint); + destructuring_or_parenthesized(p, true, &mut HashSet::new()); + p.assert(SyntaxKind::Eq); + code_expr(p); + p.wrap(m, SyntaxKind::DestructAssignment); + } else { + return; + } + + // Memoize result if we backtracked. + let offset = p.memo_arena.len(); + p.memo_arena.extend(p.nodes[m.0..].iter().cloned()); + p.memo.insert(start, (offset..p.memo_arena.len(), p.checkpoint())); +} + +/// Parses either +/// - a parenthesized expression: `(1 + 2)`, or +/// - an array: `(1, "hi", 12cm)`, or +/// - a dictionary: `(thickness: 3pt, pattern: dashed)`. +fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind { + let m = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); + + let mut state = GroupState { + count: 0, + maybe_just_parens: true, + kind: None, + seen: HashSet::new(), + }; + + if p.eat_if(SyntaxKind::Colon) { + state.kind = Some(SyntaxKind::Dict); + state.maybe_just_parens = false; + } + + while !p.current().is_terminator() { + if !p.at_set(set::ARRAY_OR_DICT_ITEM) { + p.unexpected(); + continue; + } + + array_or_dict_item(p, &mut state); + state.count += 1; + + if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) { + state.maybe_just_parens = false; + } + } + + p.expect_closing_delimiter(m, SyntaxKind::RightParen); + p.exit_newline_mode(); + + let kind = if state.maybe_just_parens && state.count == 1 { + SyntaxKind::Parenthesized + } else { + state.kind.unwrap_or(SyntaxKind::Array) + }; + + p.wrap(m, kind); + kind +} + +/// State for array/dictionary parsing. +struct GroupState { + count: usize, + maybe_just_parens: bool, + kind: Option, + seen: HashSet, +} + +/// Parses a single item in an array or dictionary. +fn array_or_dict_item(p: &mut Parser, state: &mut GroupState) { + let m = p.marker(); + + if p.eat_if(SyntaxKind::Dots) { + // Parses a spreaded item: `..item`. + code_expr(p); + p.wrap(m, SyntaxKind::Spread); + state.maybe_just_parens = false; + return; + } + + code_expr(p); + + if p.eat_if(SyntaxKind::Colon) { + // Parses a named/keyed pair: `name: item` or `"key": item`. + code_expr(p); + + let node = &mut p[m]; + let pair_kind = match node.kind() { + SyntaxKind::Ident => SyntaxKind::Named, + _ => SyntaxKind::Keyed, + }; + + if let Some(key) = match node.cast::() { + Some(ast::Expr::Ident(ident)) => Some(ident.get().clone()), + Some(ast::Expr::Str(s)) => Some(s.get()), + _ => None, + } { + if !state.seen.insert(key.clone()) { + node.convert_to_error(eco_format!("duplicate key: {key}")); } - _ => {} + } + + p.wrap(m, pair_kind); + state.maybe_just_parens = false; + + if state.kind == Some(SyntaxKind::Array) { + p[m].expected("expression"); + } else { + state.kind = Some(SyntaxKind::Dict); + } + } else { + // Parses a positional item. + if state.kind == Some(SyntaxKind::Dict) { + p[m].expected("named or keyed pair"); + } else { + state.kind = Some(SyntaxKind::Array) } } } -fn validate_array_at(p: &mut Parser, m: Marker) { - validate_array(p.post_process(m)) +/// Parses a function call's argument list: `(12pt, y)`. +fn args(p: &mut Parser) { + if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) { + p.expected("argument list"); + } + + let m = p.marker(); + if p.at(SyntaxKind::LeftParen) { + let m2 = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); + + let mut seen = HashSet::new(); + while !p.current().is_terminator() { + if !p.at_set(set::ARG) { + p.unexpected(); + continue; + } + + arg(p, &mut seen); + + if !p.current().is_terminator() { + p.expect(SyntaxKind::Comma); + } + } + + p.expect_closing_delimiter(m2, SyntaxKind::RightParen); + p.exit_newline_mode(); + } + + while p.directly_at(SyntaxKind::LeftBracket) { + content_block(p); + } + + p.wrap(m, SyntaxKind::Args); } -fn validate_array<'a>(children: impl Iterator) { - for child in children { - let kind = child.kind(); - match kind { - SyntaxKind::Array => validate_array(child.children_mut().iter_mut()), - SyntaxKind::Dict => validate_dict(child.children_mut().iter_mut()), - SyntaxKind::Named | SyntaxKind::Keyed | SyntaxKind::Underscore => { - child.convert_to_error(eco_format!( - "expected expression, found {}", - kind.name() - )); - } - _ => {} +/// Parses a single argument in an argument list. +fn arg<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>) { + let m = p.marker(); + if p.eat_if(SyntaxKind::Dots) { + // Parses a spreaded argument: `..args`. + code_expr(p); + p.wrap(m, SyntaxKind::Spread); + } else if p.at(SyntaxKind::Ident) && p.peek() == SyntaxKind::Colon { + // Parses a named argument: `thickness: 12pt`. + let text = p.current_text(); + p.assert(SyntaxKind::Ident); + if !seen.insert(text) { + p[m].convert_to_error(eco_format!("duplicate argument: {text}")); + } + p.assert(SyntaxKind::Colon); + code_expr(p); + p.wrap(m, SyntaxKind::Named); + } else { + // Parses a normal positional argument. + let at_expr = p.at_set(set::CODE_EXPR); + code_expr(p); + + // Recover from bad named pair. + if at_expr && p.eat_if(SyntaxKind::Colon) { + p[m].expected("identifier"); + code_expr(p); } } } -fn validate_dict_at(p: &mut Parser, m: Marker) { - validate_dict(p.post_process(m)) +/// Parses a closure's parameters: `(x, y)`. +fn params(p: &mut Parser) { + let m = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); + + let mut seen = HashSet::new(); + let mut sink = false; + + while !p.current().is_terminator() { + if !p.at_set(set::PARAM) { + p.unexpected(); + continue; + } + + param(p, &mut seen, &mut sink); + + if !p.current().is_terminator() { + p.expect(SyntaxKind::Comma); + } + } + + p.expect_closing_delimiter(m, SyntaxKind::RightParen); + p.exit_newline_mode(); + p.wrap(m, SyntaxKind::Params); } -fn validate_dict<'a>(children: impl Iterator) { - let mut used = HashSet::new(); - for child in children { - match child.kind() { - SyntaxKind::Named | SyntaxKind::Keyed => { - let Some(first) = child.children_mut().first_mut() else { continue }; - let key = if let Some(str) = first.cast::() { - str.get() - } else if let Some(ident) = first.cast::() { - ident.get().clone() - } else { - continue; - }; +/// Parses a single parameter in a parameter list. +fn param<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>, sink: &mut bool) { + let m = p.marker(); + if p.eat_if(SyntaxKind::Dots) { + // Parses argument sink: `..sink`. + if p.at_set(set::PATTERN_LEAF) { + pattern_leaf(p, false, seen, Some("parameter")); + } + p.wrap(m, SyntaxKind::Spread); + if mem::replace(sink, true) { + p[m].convert_to_error("only one argument sink is allowed"); + } + } else if p.at(SyntaxKind::Ident) && p.peek() == SyntaxKind::Colon { + // Parses named parameter: `thickness: 3pt`. + // We still use `pattern` even though we know it's just an identifier + // because it gives us duplicate parameter detection for free. + pattern(p, false, seen, Some("parameter")); + p.assert(SyntaxKind::Colon); + code_expr(p); + p.wrap(m, SyntaxKind::Named); + } else { + // Parses a normal position parameter. + let at_pat = p.at_set(set::PATTERN); + pattern(p, false, seen, Some("parameter")); - if !used.insert(key.clone()) { - first.convert_to_error(eco_format!("duplicate key: {key}")); - child.make_erroneous(); - } - } - SyntaxKind::Spread => {} - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Colon - | SyntaxKind::Space => {} - kind => { - child.convert_to_error(eco_format!( - "expected named or keyed pair, found {}", - kind.name() - )); - } + // Recover from bad named pair. + if at_pat && p.eat_if(SyntaxKind::Colon) { + p[m].expected("identifier"); + code_expr(p); } } } -fn validate_params_at(p: &mut Parser, m: Marker) { - let mut used_spread = false; - let mut used = HashSet::new(); - for child in p.post_process(m) { - match child.kind() { - SyntaxKind::Ident => { - if !used.insert(child.text().clone()) { - child.convert_to_error(eco_format!( - "duplicate parameter: {}", - child.text() - )); - } - } - SyntaxKind::Named => { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate parameter: {}", - within.text() - )); - child.make_erroneous(); - } - } - SyntaxKind::Spread => { - let Some(within) = child.children_mut().last_mut() else { continue }; - if used_spread { - child.convert_to_error("only one argument sink is allowed"); - continue; - } - used_spread = true; - if within.kind() == SyntaxKind::Dots { - continue; - } else if within.kind() != SyntaxKind::Ident { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - continue; - } - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate parameter: {}", - within.text() - )); - child.make_erroneous(); - } - } - SyntaxKind::Array | SyntaxKind::Dict | SyntaxKind::Destructuring => { - validate_pattern(child.children_mut().iter_mut(), &mut used, false); - child.convert_to_kind(SyntaxKind::Destructuring); - } - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Underscore => {} - kind => { - child.convert_to_error(eco_format!( - "expected identifier, named pair or argument sink, found {}", - kind.name() - )); - } - } - } -} - -fn validate_args_at(p: &mut Parser, m: Marker) { - let mut used = HashSet::new(); - for child in p.post_process(m) { - if child.kind() == SyntaxKind::Named { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate argument: {}", - within.text() - )); - child.make_erroneous(); - } - } else if child.kind() == SyntaxKind::Underscore { - child.convert_to_error("unexpected underscore"); - } - } -} - -fn validate_pattern_at(p: &mut Parser, m: Marker, forbid_expressions: bool) { - let mut used = HashSet::new(); - validate_pattern(p.post_process(m), &mut used, forbid_expressions); -} - -fn validate_pattern<'a>( - children: impl Iterator, - used: &mut HashSet, - forbid_expressions: bool, +/// Parses a binding or reassignment pattern. +fn pattern<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, + dupe: Option<&'s str>, ) { - let mut used_spread = false; - for child in children { - match child.kind() { - SyntaxKind::Ident => { - if !used.insert(child.text().clone()) { - child.convert_to_error( - "at most one binding per identifier is allowed", - ); - } - } - SyntaxKind::Spread => { - let Some(within) = child.children_mut().last_mut() else { continue }; - if used_spread { - child.convert_to_error("at most one destructuring sink is allowed"); - continue; - } - used_spread = true; + match p.current() { + SyntaxKind::Underscore => p.eat(), + SyntaxKind::LeftParen => destructuring_or_parenthesized(p, reassignment, seen), + _ => pattern_leaf(p, reassignment, seen, dupe), + } +} - if within.kind() == SyntaxKind::Dots { - continue; - } else if forbid_expressions && within.kind() != SyntaxKind::Ident { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - continue; - } +/// Parses a destructuring pattern or just a parenthesized pattern. +fn destructuring_or_parenthesized<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, +) { + let mut sink = false; + let mut count = 0; + let mut maybe_just_parens = true; - if !used.insert(within.text().clone()) { - within.convert_to_error( - "at most one binding per identifier is allowed", - ); - child.make_erroneous(); - } - } - SyntaxKind::Named => { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error( - "at most one binding per identifier is allowed", - ); - child.make_erroneous(); - } + let m = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); - if forbid_expressions { - let Some(within) = child.children_mut().last_mut() else { return }; - if within.kind() != SyntaxKind::Ident - && within.kind() != SyntaxKind::Underscore - { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - } - } - } - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Underscore => {} - kind => { - if forbid_expressions { - child.convert_to_error(eco_format!( - "expected identifier or destructuring sink, found {}", - kind.name() - )); - } + while !p.current().is_terminator() { + if !p.at_set(set::DESTRUCTURING_ITEM) { + p.unexpected(); + continue; + } + + destructuring_item(p, reassignment, seen, &mut maybe_just_parens, &mut sink); + count += 1; + + if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) { + maybe_just_parens = false; + } + } + + p.expect_closing_delimiter(m, SyntaxKind::RightParen); + p.exit_newline_mode(); + + if maybe_just_parens && count == 1 && !sink { + p.wrap(m, SyntaxKind::Parenthesized); + } else { + p.wrap(m, SyntaxKind::Destructuring); + } +} + +/// Parses an item in a destructuring pattern. +fn destructuring_item<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, + maybe_just_parens: &mut bool, + sink: &mut bool, +) { + let m = p.marker(); + if p.eat_if(SyntaxKind::Dots) { + // Parse destructuring sink: `..rest`. + if p.at_set(set::PATTERN_LEAF) { + pattern_leaf(p, reassignment, seen, None); + } + p.wrap(m, SyntaxKind::Spread); + if mem::replace(sink, true) { + p[m].convert_to_error("only one destructuring sink is allowed"); + } + } else if p.at(SyntaxKind::Ident) && p.peek() == SyntaxKind::Colon { + // Parse named destructuring item. + p.assert(SyntaxKind::Ident); + p.assert(SyntaxKind::Colon); + pattern(p, reassignment, seen, None); + p.wrap(m, SyntaxKind::Named); + *maybe_just_parens = false; + } else { + // Parse positional destructuring item. + let at_pat = p.at_set(set::PATTERN); + pattern(p, reassignment, seen, None); + + // Recover from bad named destructuring. + if at_pat && p.eat_if(SyntaxKind::Colon) { + p[m].expected("identifier"); + pattern(p, reassignment, seen, None); + } + } +} + +/// Parses a leaf in a pattern - either an identifier or an expression +/// depending on whether it's a binding or reassignment pattern. +fn pattern_leaf<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, + dupe: Option<&'s str>, +) { + if !p.at_set(set::PATTERN_LEAF) { + if p.current().is_keyword() { + p.eat_and_get().expected("pattern"); + } else { + p.expected("pattern"); + } + return; + } + + let m = p.marker(); + let text = p.current_text(); + + // We parse an atomic expression even though we only want an identifier for + // better error recovery. We can mark the whole expression as unexpected + // instead of going through its pieces one by one. + code_expr_prec(p, true, 0); + + if !reassignment { + let node = &mut p[m]; + if node.kind() == SyntaxKind::Ident { + if !seen.insert(text) { + node.convert_to_error(eco_format!( + "duplicate {}: {text}", + dupe.unwrap_or("binding"), + )); } + } else { + node.expected("pattern"); } } } @@ -1484,13 +1440,16 @@ struct Parser<'s> { prev_end: usize, current_start: usize, current: SyntaxKind, - modes: Vec, - nodes: Vec, - newline_modes: Vec, balanced: bool, + nodes: Vec, + modes: Vec, + newline_modes: Vec, + memo: HashMap, Checkpoint<'s>)>, + memo_arena: Vec, } /// How to proceed with parsing when seeing a newline. +#[derive(Clone)] enum NewlineMode { /// Stop always. Stop, @@ -1503,6 +1462,15 @@ enum NewlineMode { #[derive(Debug, Copy, Clone, Eq, PartialEq)] struct Marker(usize); +#[derive(Clone)] +struct Checkpoint<'s> { + lexer: Lexer<'s>, + prev_end: usize, + current_start: usize, + current: SyntaxKind, + nodes: usize, +} + impl<'s> Parser<'s> { fn new(text: &'s str, offset: usize, mode: LexMode) -> Self { let mut lexer = Lexer::new(text, mode); @@ -1514,10 +1482,12 @@ impl<'s> Parser<'s> { prev_end: offset, current_start: offset, current, - modes: vec![], - nodes: vec![], - newline_modes: vec![], balanced: true, + nodes: vec![], + modes: vec![], + newline_modes: vec![], + memo: HashMap::new(), + memo_arena: vec![], } } @@ -1553,10 +1523,8 @@ impl<'s> Parser<'s> { set.contains(self.current) } - #[track_caller] - fn assert(&mut self, kind: SyntaxKind) { - assert_eq!(self.current, kind); - self.eat(); + fn peek(&self) -> SyntaxKind { + self.lexer.clone().next() } fn eof(&self) -> bool { @@ -1567,6 +1535,20 @@ impl<'s> Parser<'s> { self.current == kind && self.prev_end == self.current_start } + fn eat(&mut self) { + self.save(); + self.lex(); + self.skip(); + } + + fn eat_and_get(&mut self) -> &mut SyntaxNode { + let offset = self.nodes.len(); + self.save(); + self.lex(); + self.skip(); + &mut self.nodes[offset] + } + /// Eats if at `kind`. /// /// Note: In math and code mode, this will ignore trivia in front of the @@ -1588,6 +1570,12 @@ impl<'s> Parser<'s> { at } + #[track_caller] + fn assert(&mut self, kind: SyntaxKind) { + assert_eq!(self.current, kind); + self.eat(); + } + fn convert(&mut self, kind: SyntaxKind) { self.current = kind; self.eat(); @@ -1605,12 +1593,21 @@ impl<'s> Parser<'s> { Marker(self.nodes.len()) } - fn node(&self, m: Marker) -> Option<&SyntaxNode> { - self.nodes.get(m.0) + /// Get a marker after the last non-trivia node. + fn before_trivia(&self) -> Marker { + let mut i = self.nodes.len(); + if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start { + while i > 0 && self.nodes[i - 1].kind().is_trivia() { + i -= 1; + } + } + Marker(i) } - fn node_mut(&mut self, m: Marker) -> Option<&mut SyntaxNode> { - self.nodes.get_mut(m.0) + /// Whether the last non-trivia node is an error. + fn after_error(&mut self) -> bool { + let m = self.before_trivia(); + m.0 > 0 && self.nodes[m.0 - 1].kind().is_error() } fn post_process(&mut self, m: Marker) -> impl Iterator { @@ -1635,10 +1632,6 @@ impl<'s> Parser<'s> { self.nodes.insert(from, SyntaxNode::inner(kind, children)); } - fn progress(&self, offset: usize) -> bool { - offset < self.prev_end - } - fn enter(&mut self, mode: LexMode) { self.modes.push(self.lexer.mode()); self.lexer.set_mode(mode); @@ -1667,10 +1660,22 @@ impl<'s> Parser<'s> { self.skip(); } - fn eat(&mut self) { - self.save(); - self.lex(); - self.skip(); + fn checkpoint(&self) -> Checkpoint<'s> { + Checkpoint { + lexer: self.lexer.clone(), + prev_end: self.prev_end, + current_start: self.current_start, + current: self.current, + nodes: self.nodes.len(), + } + } + + fn restore(&mut self, checkpoint: Checkpoint<'s>) { + self.lexer = checkpoint.lexer; + self.prev_end = checkpoint.prev_end; + self.current_start = checkpoint.current_start; + self.current = checkpoint.current; + self.nodes.truncate(checkpoint.nodes); } fn skip(&mut self) { @@ -1734,14 +1739,8 @@ impl<'s> Parser<'s> { if at { self.eat(); } else if kind == SyntaxKind::Ident && self.current.is_keyword() { - let found_text = self.current_text(); - let found = self.current.name(); - self.expected_found(kind.name(), found); - self.hint(eco_format!( - "{} is not allowed as an identifier; try `{}_` instead", - found, - found_text - )); + self.trim_errors(); + self.eat_and_get().expected(kind.name()); } else { self.balanced &= !kind.is_grouping(); self.expected(kind.name()); @@ -1749,6 +1748,14 @@ impl<'s> Parser<'s> { at } + /// Consume the given closing delimiter or produce an error for the matching + /// opening delimiter at `open`. + fn expect_closing_delimiter(&mut self, open: Marker, kind: SyntaxKind) { + if !self.eat_if(kind) { + self.nodes[open.0].convert_to_error("unclosed delimiter"); + } + } + /// Produce an error that the given `thing` was expected. fn expected(&mut self, thing: &str) { if !self.after_error() { @@ -1756,70 +1763,19 @@ impl<'s> Parser<'s> { } } - /// Produce an error that the given `thing` was expected but another - /// thing was `found` and consume the next token. - fn expected_found(&mut self, thing: &str, found: &str) { - self.trim_errors(); - self.convert_to_error(eco_format!("expected {thing}, found {found}")); - } - /// Produce an error that the given `thing` was expected at the position /// of the marker `m`. fn expected_at(&mut self, m: Marker, thing: &str) { - let message = eco_format!("expected {}", thing); - let error = SyntaxNode::error(message, ""); + let error = SyntaxNode::error(eco_format!("expected {thing}"), ""); self.nodes.insert(m.0, error); } - /// Produce an error for the unclosed delimiter `kind` at the position - /// `open`. - fn expect_closing_delimiter(&mut self, open: Marker, kind: SyntaxKind) { - if !self.eat_if(kind) { - self.nodes[open.0].convert_to_error("unclosed delimiter"); - } - } - /// Consume the next token (if any) and produce an error stating that it was /// unexpected. fn unexpected(&mut self) { self.trim_errors(); - self.convert_to_error(eco_format!("unexpected {}", self.current.name())); - } - - /// Consume the next token and turn it into an error. - fn convert_to_error(&mut self, message: EcoString) { - let kind = self.current; - let offset = self.nodes.len(); - self.eat(); - self.balanced &= !kind.is_grouping(); - if !kind.is_error() { - self.nodes[offset].convert_to_error(message); - } - } - - /// Adds a hint to the last node, if the last node is an error. - fn hint(&mut self, hint: impl Into) { - let m = self.before_trivia(); - if m.0 > 0 { - self.nodes[m.0 - 1].hint(hint); - } - } - - /// Get a marker after the last non-trivia node. - fn before_trivia(&self) -> Marker { - let mut i = self.nodes.len(); - if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start { - while i > 0 && self.nodes[i - 1].kind().is_trivia() { - i -= 1; - } - } - Marker(i) - } - - /// Whether the last non-trivia node is an error. - fn after_error(&mut self) -> bool { - let m = self.before_trivia(); - m.0 > 0 && self.nodes[m.0 - 1].kind().is_error() + self.balanced &= !self.current.is_grouping(); + self.eat_and_get().unexpected(); } /// Remove trailing errors with zero length. @@ -1835,3 +1791,17 @@ impl<'s> Parser<'s> { self.nodes.drain(start..end); } } + +impl Index for Parser<'_> { + type Output = SyntaxNode; + + fn index(&self, m: Marker) -> &Self::Output { + &self.nodes[m.0] + } +} + +impl IndexMut for Parser<'_> { + fn index_mut(&mut self, m: Marker) -> &mut Self::Output { + &mut self.nodes[m.0] + } +} diff --git a/crates/typst-syntax/src/set.rs b/crates/typst-syntax/src/set.rs index 26b4ecd53..88a9b18b8 100644 --- a/crates/typst-syntax/src/set.rs +++ b/crates/typst-syntax/src/set.rs @@ -10,17 +10,16 @@ pub struct SyntaxSet(u128); impl SyntaxSet { /// Create a new set from a slice of kinds. - pub const fn new(slice: &[SyntaxKind]) -> Self { - let mut bits = 0; - let mut i = 0; - while i < slice.len() { - bits |= bit(slice[i]); - i += 1; - } - Self(bits) + pub const fn new() -> Self { + Self(0) } /// Insert a syntax kind into the set. + pub const fn add(self, kind: SyntaxKind) -> Self { + Self(self.0 | bit(kind)) + } + + /// Combine two syntax sets. pub const fn union(self, other: Self) -> Self { Self(self.0 | other.0) } @@ -36,56 +35,53 @@ const fn bit(kind: SyntaxKind) -> u128 { } /// Syntax kinds that can start a statement. -pub const STMT: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Let, - SyntaxKind::Set, - SyntaxKind::Show, - SyntaxKind::Import, - SyntaxKind::Include, - SyntaxKind::Return, -]); +pub const STMT: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Let) + .add(SyntaxKind::Set) + .add(SyntaxKind::Show) + .add(SyntaxKind::Import) + .add(SyntaxKind::Include) + .add(SyntaxKind::Return); /// Syntax kinds that can start a markup expression. -pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Space, - SyntaxKind::Parbreak, - SyntaxKind::LineComment, - SyntaxKind::BlockComment, - SyntaxKind::Text, - SyntaxKind::Linebreak, - SyntaxKind::Escape, - SyntaxKind::Shorthand, - SyntaxKind::SmartQuote, - SyntaxKind::Raw, - SyntaxKind::Link, - SyntaxKind::Label, - SyntaxKind::Hash, - SyntaxKind::Star, - SyntaxKind::Underscore, - SyntaxKind::HeadingMarker, - SyntaxKind::ListMarker, - SyntaxKind::EnumMarker, - SyntaxKind::TermMarker, - SyntaxKind::RefMarker, - SyntaxKind::Dollar, - SyntaxKind::LeftBracket, - SyntaxKind::RightBracket, - SyntaxKind::Colon, -]); +pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Space) + .add(SyntaxKind::Parbreak) + .add(SyntaxKind::LineComment) + .add(SyntaxKind::BlockComment) + .add(SyntaxKind::Text) + .add(SyntaxKind::Linebreak) + .add(SyntaxKind::Escape) + .add(SyntaxKind::Shorthand) + .add(SyntaxKind::SmartQuote) + .add(SyntaxKind::Raw) + .add(SyntaxKind::Link) + .add(SyntaxKind::Label) + .add(SyntaxKind::Hash) + .add(SyntaxKind::Star) + .add(SyntaxKind::Underscore) + .add(SyntaxKind::HeadingMarker) + .add(SyntaxKind::ListMarker) + .add(SyntaxKind::EnumMarker) + .add(SyntaxKind::TermMarker) + .add(SyntaxKind::RefMarker) + .add(SyntaxKind::Dollar) + .add(SyntaxKind::LeftBracket) + .add(SyntaxKind::RightBracket) + .add(SyntaxKind::Colon); /// Syntax kinds that can start a math expression. -pub const MATH_EXPR: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Hash, - SyntaxKind::MathIdent, - SyntaxKind::Text, - SyntaxKind::Shorthand, - SyntaxKind::Linebreak, - SyntaxKind::MathAlignPoint, - SyntaxKind::Escape, - SyntaxKind::Str, - SyntaxKind::Root, - SyntaxKind::Prime, -]); +pub const MATH_EXPR: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Hash) + .add(SyntaxKind::MathIdent) + .add(SyntaxKind::Text) + .add(SyntaxKind::Shorthand) + .add(SyntaxKind::Linebreak) + .add(SyntaxKind::MathAlignPoint) + .add(SyntaxKind::Escape) + .add(SyntaxKind::Str) + .add(SyntaxKind::Root) + .add(SyntaxKind::Prime); /// Syntax kinds that can start a code expression. pub const CODE_EXPR: SyntaxSet = CODE_PRIMARY.union(UNARY_OP); @@ -94,63 +90,81 @@ pub const CODE_EXPR: SyntaxSet = CODE_PRIMARY.union(UNARY_OP); pub const ATOMIC_CODE_EXPR: SyntaxSet = ATOMIC_CODE_PRIMARY; /// Syntax kinds that can start a code primary. -pub const CODE_PRIMARY: SyntaxSet = - ATOMIC_CODE_PRIMARY.union(SyntaxSet::new(&[SyntaxKind::Underscore])); +pub const CODE_PRIMARY: SyntaxSet = ATOMIC_CODE_PRIMARY.add(SyntaxKind::Underscore); /// Syntax kinds that can start an atomic code primary. -pub const ATOMIC_CODE_PRIMARY: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Ident, - SyntaxKind::LeftBrace, - SyntaxKind::LeftBracket, - SyntaxKind::LeftParen, - SyntaxKind::Dollar, - SyntaxKind::Let, - SyntaxKind::Set, - SyntaxKind::Show, - SyntaxKind::If, - SyntaxKind::While, - SyntaxKind::For, - SyntaxKind::Import, - SyntaxKind::Include, - SyntaxKind::Break, - SyntaxKind::Continue, - SyntaxKind::Return, - SyntaxKind::None, - SyntaxKind::Auto, - SyntaxKind::Int, - SyntaxKind::Float, - SyntaxKind::Bool, - SyntaxKind::Numeric, - SyntaxKind::Str, - SyntaxKind::Label, - SyntaxKind::Raw, -]); +pub const ATOMIC_CODE_PRIMARY: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Ident) + .add(SyntaxKind::LeftBrace) + .add(SyntaxKind::LeftBracket) + .add(SyntaxKind::LeftParen) + .add(SyntaxKind::Dollar) + .add(SyntaxKind::Let) + .add(SyntaxKind::Set) + .add(SyntaxKind::Show) + .add(SyntaxKind::If) + .add(SyntaxKind::While) + .add(SyntaxKind::For) + .add(SyntaxKind::Import) + .add(SyntaxKind::Include) + .add(SyntaxKind::Break) + .add(SyntaxKind::Continue) + .add(SyntaxKind::Return) + .add(SyntaxKind::None) + .add(SyntaxKind::Auto) + .add(SyntaxKind::Int) + .add(SyntaxKind::Float) + .add(SyntaxKind::Bool) + .add(SyntaxKind::Numeric) + .add(SyntaxKind::Str) + .add(SyntaxKind::Label) + .add(SyntaxKind::Raw); /// Syntax kinds that are unary operators. -pub const UNARY_OP: SyntaxSet = - SyntaxSet::new(&[SyntaxKind::Plus, SyntaxKind::Minus, SyntaxKind::Not]); +pub const UNARY_OP: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Plus) + .add(SyntaxKind::Minus) + .add(SyntaxKind::Not); /// Syntax kinds that are binary operators. -pub const BINARY_OP: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Plus, - SyntaxKind::Minus, - SyntaxKind::Star, - SyntaxKind::Slash, - SyntaxKind::And, - SyntaxKind::Or, - SyntaxKind::EqEq, - SyntaxKind::ExclEq, - SyntaxKind::Lt, - SyntaxKind::LtEq, - SyntaxKind::Gt, - SyntaxKind::GtEq, - SyntaxKind::Eq, - SyntaxKind::In, - SyntaxKind::PlusEq, - SyntaxKind::HyphEq, - SyntaxKind::StarEq, - SyntaxKind::SlashEq, -]); +pub const BINARY_OP: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Plus) + .add(SyntaxKind::Minus) + .add(SyntaxKind::Star) + .add(SyntaxKind::Slash) + .add(SyntaxKind::And) + .add(SyntaxKind::Or) + .add(SyntaxKind::EqEq) + .add(SyntaxKind::ExclEq) + .add(SyntaxKind::Lt) + .add(SyntaxKind::LtEq) + .add(SyntaxKind::Gt) + .add(SyntaxKind::GtEq) + .add(SyntaxKind::Eq) + .add(SyntaxKind::In) + .add(SyntaxKind::PlusEq) + .add(SyntaxKind::HyphEq) + .add(SyntaxKind::StarEq) + .add(SyntaxKind::SlashEq); + +/// Syntax kinds that can start an argument in a function call. +pub const ARRAY_OR_DICT_ITEM: SyntaxSet = CODE_EXPR.add(SyntaxKind::Dots); + +/// Syntax kinds that can start an argument in a function call. +pub const ARG: SyntaxSet = CODE_EXPR.add(SyntaxKind::Dots); + +/// Syntax kinds that can start a parameter in a parameter list. +pub const PARAM: SyntaxSet = PATTERN.add(SyntaxKind::Dots); + +/// Syntax kinds that can start a destructuring item. +pub const DESTRUCTURING_ITEM: SyntaxSet = PATTERN.add(SyntaxKind::Dots); + +/// Syntax kinds that can start a pattern. +pub const PATTERN: SyntaxSet = + PATTERN_LEAF.add(SyntaxKind::LeftParen).add(SyntaxKind::Underscore); + +/// Syntax kinds that can start a pattern leaf. +pub const PATTERN_LEAF: SyntaxSet = ATOMIC_CODE_EXPR; #[cfg(test)] mod tests { @@ -163,7 +177,7 @@ mod tests { #[test] fn test_set() { - let set = SyntaxSet::new(&[SyntaxKind::And, SyntaxKind::Or]); + let set = SyntaxSet::new().add(SyntaxKind::And).add(SyntaxKind::Or); assert!(set.contains(SyntaxKind::And)); assert!(set.contains(SyntaxKind::Or)); assert!(!set.contains(SyntaxKind::Not)); diff --git a/crates/typst/src/eval/binding.rs b/crates/typst/src/eval/binding.rs index abb2f4bcd..8a2d91eef 100644 --- a/crates/typst/src/eval/binding.rs +++ b/crates/typst/src/eval/binding.rs @@ -31,7 +31,7 @@ impl Eval for ast::DestructAssignment<'_> { fn eval(self, vm: &mut Vm) -> SourceResult { let value = self.value().eval(vm)?; - destructure_impl(vm, self.pattern(), value, |vm, expr, value| { + destructure_impl(vm, self.pattern(), value, &mut |vm, expr, value| { let location = expr.access(vm)?; *location = value; Ok(()) @@ -46,33 +46,34 @@ pub(crate) fn destructure( pattern: ast::Pattern, value: Value, ) -> SourceResult<()> { - destructure_impl(vm, pattern, value, |vm, expr, value| match expr { + destructure_impl(vm, pattern, value, &mut |vm, expr, value| match expr { ast::Expr::Ident(ident) => { vm.define(ident, value); Ok(()) } - _ => bail!(expr.span(), "nested patterns are currently not supported"), + _ => bail!(expr.span(), "cannot assign to this expression"), }) } /// Destruct the given value into the pattern and apply the function to each binding. -fn destructure_impl( +fn destructure_impl( vm: &mut Vm, pattern: ast::Pattern, value: Value, - f: T, + f: &mut F, ) -> SourceResult<()> where - T: Fn(&mut Vm, ast::Expr, Value) -> SourceResult<()>, + F: Fn(&mut Vm, ast::Expr, Value) -> SourceResult<()>, { match pattern { - ast::Pattern::Normal(expr) => { - f(vm, expr, value)?; - } + ast::Pattern::Normal(expr) => f(vm, expr, value)?, ast::Pattern::Placeholder(_) => {} + ast::Pattern::Parenthesized(parenthesized) => { + destructure_impl(vm, parenthesized.pattern(), value, f)? + } ast::Pattern::Destructuring(destruct) => match value { - Value::Array(value) => destructure_array(vm, pattern, value, f, destruct)?, - Value::Dict(value) => destructure_dict(vm, value, f, destruct)?, + Value::Array(value) => destructure_array(vm, destruct, value, f)?, + Value::Dict(value) => destructure_dict(vm, destruct, value, f)?, _ => bail!(pattern.span(), "cannot destructure {}", value.ty()), }, } @@ -81,51 +82,44 @@ where fn destructure_array( vm: &mut Vm, - pattern: ast::Pattern, - value: Array, - f: F, destruct: ast::Destructuring, + value: Array, + f: &mut F, ) -> SourceResult<()> where F: Fn(&mut Vm, ast::Expr, Value) -> SourceResult<()>, { - let mut i = 0; let len = value.as_slice().len(); - for p in destruct.bindings() { + let mut i = 0; + + for p in destruct.items() { match p { - ast::DestructuringKind::Normal(expr) => { + ast::DestructuringItem::Pattern(pattern) => { let Ok(v) = value.at(i as i64, None) else { - bail!(expr.span(), "not enough elements to destructure"); + bail!(pattern.span(), "not enough elements to destructure"); }; - f(vm, expr, v)?; + destructure_impl(vm, pattern, v, f)?; i += 1; } - ast::DestructuringKind::Sink(spread) => { - let sink_size = (1 + len).checked_sub(destruct.bindings().count()); + ast::DestructuringItem::Spread(spread) => { + let sink_size = (1 + len).checked_sub(destruct.items().count()); let sink = sink_size.and_then(|s| value.as_slice().get(i..i + s)); - if let (Some(sink_size), Some(sink)) = (sink_size, sink) { - if let Some(expr) = spread.expr() { - f(vm, expr, Value::Array(sink.into()))?; - } - i += sink_size; - } else { - bail!(pattern.span(), "not enough elements to destructure") + let (Some(sink_size), Some(sink)) = (sink_size, sink) else { + bail!(spread.span(), "not enough elements to destructure"); + }; + if let Some(expr) = spread.sink_expr() { + f(vm, expr, Value::Array(sink.into()))?; } + i += sink_size; } - ast::DestructuringKind::Named(named) => { - bail!(named.span(), "cannot destructure named elements from an array") - } - ast::DestructuringKind::Placeholder(underscore) => { - if i < len { - i += 1 - } else { - bail!(underscore.span(), "not enough elements to destructure") - } + ast::DestructuringItem::Named(named) => { + bail!(named.span(), "cannot destructure named pattern from an array") } } } + if i < len { - bail!(pattern.span(), "too many elements to destructure"); + bail!(destruct.span(), "too many elements to destructure"); } Ok(()) @@ -133,32 +127,35 @@ where fn destructure_dict( vm: &mut Vm, - dict: Dict, - f: F, destruct: ast::Destructuring, + dict: Dict, + f: &mut F, ) -> SourceResult<()> where F: Fn(&mut Vm, ast::Expr, Value) -> SourceResult<()>, { let mut sink = None; let mut used = HashSet::new(); - for p in destruct.bindings() { + + for p in destruct.items() { match p { - ast::DestructuringKind::Normal(ast::Expr::Ident(ident)) => { + // Shorthand for a direct identifier. + ast::DestructuringItem::Pattern(ast::Pattern::Normal(ast::Expr::Ident( + ident, + ))) => { let v = dict.get(&ident).at(ident.span())?; f(vm, ast::Expr::Ident(ident), v.clone())?; - used.insert(ident.as_str()); + used.insert(ident.get().clone()); } - ast::DestructuringKind::Sink(spread) => sink = spread.expr(), - ast::DestructuringKind::Named(named) => { + ast::DestructuringItem::Named(named) => { let name = named.name(); let v = dict.get(&name).at(name.span())?; - f(vm, named.expr(), v.clone())?; - used.insert(name.as_str()); + destructure_impl(vm, named.pattern(), v.clone(), f)?; + used.insert(name.get().clone()); } - ast::DestructuringKind::Placeholder(_) => {} - ast::DestructuringKind::Normal(expr) => { - bail!(expr.span(), "expected key, found expression"); + ast::DestructuringItem::Spread(spread) => sink = spread.sink_expr(), + ast::DestructuringItem::Pattern(expr) => { + bail!(expr.span(), "cannot destructure unnamed pattern from dictionary"); } } } diff --git a/crates/typst/src/eval/call.rs b/crates/typst/src/eval/call.rs index c1249c16b..3717903c3 100644 --- a/crates/typst/src/eval/call.rs +++ b/crates/typst/src/eval/call.rs @@ -192,13 +192,14 @@ impl Eval for ast::Args<'_> { }); } ast::Arg::Named(named) => { + let expr = named.expr(); items.push(Arg { span, name: Some(named.name().get().clone().into()), - value: Spanned::new(named.expr().eval(vm)?, named.expr().span()), + value: Spanned::new(expr.eval(vm)?, expr.span()), }); } - ast::Arg::Spread(expr) => match expr.eval(vm)? { + ast::Arg::Spread(spread) => match spread.expr().eval(vm)? { Value::None => {} Value::Array(array) => { items.extend(array.into_iter().map(|value| Arg { @@ -215,7 +216,7 @@ impl Eval for ast::Args<'_> { })); } Value::Args(args) => items.extend(args.items), - v => bail!(expr.span(), "cannot spread {}", v.ty()), + v => bail!(spread.span(), "cannot spread {}", v.ty()), }, } } @@ -311,7 +312,6 @@ pub(crate) fn call_closure( ast::Pattern::Normal(ast::Expr::Ident(ident)) => { vm.define(ident, args.expect::(&ident)?) } - ast::Pattern::Normal(_) => unreachable!(), pattern => { crate::eval::destructure( &mut vm, @@ -320,8 +320,8 @@ pub(crate) fn call_closure( )?; } }, - ast::Param::Sink(ident) => { - sink = Some(ident.name()); + ast::Param::Spread(spread) => { + sink = Some(spread.sink_ident()); if let Some(sink_size) = sink_size { sink_pos_values = Some(args.consume(sink_size)?); } @@ -336,10 +336,10 @@ pub(crate) fn call_closure( } } - if let Some(sink_name) = sink { + if let Some(sink) = sink { // Remaining args are captured regardless of whether the sink is named. let mut remaining_args = args.take(); - if let Some(sink_name) = sink_name { + if let Some(sink_name) = sink { if let Some(sink_pos_values) = sink_pos_values { remaining_args.items.extend(sink_pos_values); } @@ -436,13 +436,15 @@ impl<'a> CapturesVisitor<'a> { for param in expr.params().children() { match param { ast::Param::Pos(pattern) => { - for ident in pattern.idents() { + for ident in pattern.bindings() { self.bind(ident); } } ast::Param::Named(named) => self.bind(named.name()), - ast::Param::Sink(spread) => { - self.bind(spread.name().unwrap_or_default()) + ast::Param::Spread(spread) => { + if let Some(ident) = spread.sink_ident() { + self.bind(ident); + } } } } @@ -458,7 +460,7 @@ impl<'a> CapturesVisitor<'a> { self.visit(init.to_untyped()); } - for ident in expr.kind().idents() { + for ident in expr.kind().bindings() { self.bind(ident); } } @@ -471,7 +473,7 @@ impl<'a> CapturesVisitor<'a> { self.internal.enter(); let pattern = expr.pattern(); - for ident in pattern.idents() { + for ident in pattern.bindings() { self.bind(ident); } diff --git a/crates/typst/src/eval/code.rs b/crates/typst/src/eval/code.rs index d41b593c2..e93086255 100644 --- a/crates/typst/src/eval/code.rs +++ b/crates/typst/src/eval/code.rs @@ -210,10 +210,10 @@ impl Eval for ast::Array<'_> { for item in items { match item { ast::ArrayItem::Pos(expr) => vec.push(expr.eval(vm)?), - ast::ArrayItem::Spread(expr) => match expr.eval(vm)? { + ast::ArrayItem::Spread(spread) => match spread.expr().eval(vm)? { Value::None => {} Value::Array(array) => vec.extend(array.into_iter()), - v => bail!(expr.span(), "cannot spread {} into array", v.ty()), + v => bail!(spread.span(), "cannot spread {} into array", v.ty()), }, } } @@ -227,7 +227,6 @@ impl Eval for ast::Dict<'_> { fn eval(self, vm: &mut Vm) -> SourceResult { let mut map = indexmap::IndexMap::new(); - let mut invalid_keys = eco_vec![]; for item in self.items() { @@ -245,10 +244,10 @@ impl Eval for ast::Dict<'_> { }); map.insert(key, keyed.expr().eval(vm)?); } - ast::DictItem::Spread(expr) => match expr.eval(vm)? { + ast::DictItem::Spread(spread) => match spread.expr().eval(vm)? { Value::None => {} Value::Dict(dict) => map.extend(dict.into_iter()), - v => bail!(expr.span(), "cannot spread {} into dictionary", v.ty()), + v => bail!(spread.span(), "cannot spread {} into dictionary", v.ty()), }, } } diff --git a/tests/src/tests.rs b/tests/src/tests.rs index 7da2be868..09baa1901 100644 --- a/tests/src/tests.rs +++ b/tests/src/tests.rs @@ -884,7 +884,7 @@ fn print_annotation( let start_col = 1 + source.byte_to_column(range.start).unwrap(); let end_line = 1 + line + source.byte_to_line(range.end).unwrap(); let end_col = 1 + source.byte_to_column(range.end).unwrap(); - write!(output, "{start_line}:{start_col}-{end_line}:{end_col}: ").unwrap(); + write!(output, "{start_line}:{start_col}-{end_line}:{end_col} ").unwrap(); } writeln!(output, "{text}").unwrap(); } diff --git a/tests/typ/bugs/parenthesized.typ b/tests/typ/bugs/parenthesized.typ new file mode 100644 index 000000000..99488a676 --- /dev/null +++ b/tests/typ/bugs/parenthesized.typ @@ -0,0 +1,86 @@ +// Ref: false +// Test bugs related to destructuring and parenthesized parsing. + +--- +// https://github.com/typst/typst/issues/1338 +#let foo = "foo" +#let bar = "bar" +// Error: 8-9 expected expression, found underscore +// Error: 16-17 expected expression, found underscore +#(foo: _, bar: _) + +--- +// https://github.com/typst/typst/issues/1342 +// Error: 5-8 expected named or keyed pair, found identifier +// Error: 10-13 expected named or keyed pair, found identifier +#(: foo, bar) + +--- +// https://github.com/typst/typst/issues/1351 +// Error: 17-22 expected pattern, found string +#let foo((test: "bar")) = {} + +--- +// https://github.com/typst/typst/issues/3014 +// Error: 8-17 expected expression, found named pair +#(box, fill: red) + +--- +// https://github.com/typst/typst/issues/3144 +#let f(a: 10) = a(1) + 1 +#test(f(a: _ => 5), 6) + +--- +// Error: 18-20 missing argument: pattern parameter +#let f(a: 10) = a() + 1 +#f(a: _ => 5) + +--- +// This wasn't allowed. +#let ((x)) = 1 +#test(x, 1) + +--- +// This also wasn't allowed. +#let ((a, b)) = (1, 2) +#test(a, 1) +#test(b, 2) + +--- +// This was unintentionally allowed ... +// Error: 9 expected equals sign +#let (a) + +--- +// ... where this wasn't. +// Error: 12 expected equals sign +#let (a, b) + +--- +// This wasn't allowed before the bug fix ... +#let f(..) = {} +#f(arg: 1) + +--- +// ... but this was. +#let f(..x) = {} +#f(arg: 1) + +--- +// Here, `best` was accessed as a variable, where it shouldn't have. +#{ + (best: _) = (best: "brr") +} + +--- +// Same here. +#{ + let array = (1, 2, 3, 4) + (test: array.at(1), best: _) = (test: "baz", best: "brr") + test(array, (1, "baz", 3, 4)) +} + +--- +// Here, `a` is not duplicate, where it was previously identified as one. +#let f((a: b), (c,), a) = (a, b, c) +#test(f((a: 1), (2,), 3), (3, 1, 2)) diff --git a/tests/typ/compiler/backtracking.typ b/tests/typ/compiler/backtracking.typ new file mode 100644 index 000000000..9c3ab8ec4 --- /dev/null +++ b/tests/typ/compiler/backtracking.typ @@ -0,0 +1,33 @@ +// Ensure that parser backtracking doesn't lead to exponential time consumption. +// If this regresses, the test suite will not terminate, which is a bit +// unfortunate compared to a good error, but at least we know something is up. +// +// Ref: false + +--- +#{ + let s = "(x: 1) => x" + let pat = "(x: {}) => 1 + x()" + for _ in range(50) { + s = pat.replace("{}", s) + } + test(eval(s)(), 51) +} + +--- +#{ + let s = "(x) = 1" + let pat = "(x: {_}) = 1" + for _ in range(100) { + s = pat.replace("_", s) + } + // Error: 8-9 cannot destructure integer + eval(s) +} + +--- +// Test whitespace after memoized part. +#( (x: () => 1 ) => 1 ) +// ------- +// This is memoized and we want to ensure that whitespace after this +// is handled correctly. diff --git a/tests/typ/compiler/block.typ b/tests/typ/compiler/block.typ index 0580acd2a..48c9fefcd 100644 --- a/tests/typ/compiler/block.typ +++ b/tests/typ/compiler/block.typ @@ -126,8 +126,7 @@ // Should output `3`. #{ - // Error: 6 expected identifier - // Error: 10 expected block + // Error: 7-10 expected pattern, found string for "v" // Error: 8 expected keyword `in` diff --git a/tests/typ/compiler/call.typ b/tests/typ/compiler/call.typ index e48eabfd7..0c225a1c8 100644 --- a/tests/typ/compiler/call.typ +++ b/tests/typ/compiler/call.typ @@ -75,8 +75,7 @@ #f[1](2) --- -// Error: 7 expected expression -// Error: 8 expected expression +// Error: 7-8 unexpected colon #func(:) // Error: 10-12 unexpected end of block comment diff --git a/tests/typ/compiler/closure.typ b/tests/typ/compiler/closure.typ index 85e9dbe25..ef4e7df04 100644 --- a/tests/typ/compiler/closure.typ +++ b/tests/typ/compiler/closure.typ @@ -171,25 +171,26 @@ #let f((k: a, b), c: 3, (d,)) = (a, b, c, d) #test(f((k: 1, b: 2), (4,)), (1, 2, 3, 4)) -// Error: 22-23 duplicate parameter: a -#let f((a: b), (c,), a) = none - -// Error: 8-14 expected identifier, found array +// Error: 8-14 expected identifier, found destructuring pattern #let f((a, b): 0) = none -// Error: 10-19 expected identifier, found destructuring pattern +// Error: 10-19 expected pattern, found array #let f(..(a, b: c)) = none -// Error: 10-16 expected identifier, found array +// Error: 10-16 expected pattern, found array #let f(..(a, b)) = none -// Error: 10-19 expected identifier, found destructuring pattern -#let f(..(a, b: c)) = none - --- // Error: 11-12 duplicate parameter: x #let f(x, x) = none +--- +// Error: 21 expected comma +// Error: 22-23 expected pattern, found integer +// Error: 24-25 unexpected plus +// Error: 26-27 expected pattern, found integer +#let f = (x: () => 1 2 + 3) => 4 + --- // Error: 14-15 duplicate parameter: a // Error: 23-24 duplicate parameter: b @@ -201,17 +202,18 @@ #let f(a, ..a) = none --- -// Error: 7-17 expected identifier, named pair or argument sink, found keyed pair +// Error: 7-14 expected pattern, found string #((a, "named": b) => none) --- -// Error: 10-15 expected identifier, found string +// Error: 10-15 expected pattern, found string #let foo("key": b) = key --- -// Error: 10-14 expected identifier, found `none` +// Error: 10-14 expected pattern, found `none` +// Hint: 10-14 keyword `none` is not allowed as an identifier; try `none_` instead #let foo(none: b) = key --- -// Error: 11 expected comma +// Error: 10-11 expected identifier, found underscore #let foo(_: 3) = none diff --git a/tests/typ/compiler/dict.typ b/tests/typ/compiler/dict.typ index 8a5be5cd0..552b243c9 100644 --- a/tests/typ/compiler/dict.typ +++ b/tests/typ/compiler/dict.typ @@ -110,7 +110,6 @@ // Identified as dictionary due to initial colon. // The boolean key is allowed for now since it will only cause an error at the evaluation stage. // Error: 4-5 expected named or keyed pair, found integer -// Error: 5 expected comma // Error: 17 expected expression #(:1 b:"", true:) @@ -152,7 +151,7 @@ --- // Error: 7-10 expected identifier, found group -// Error: 12-14 expected identifier, found integer +// Error: 12-14 expected pattern, found integer #let ((a): 10) = "world" --- diff --git a/tests/typ/compiler/embedded-expr.typ b/tests/typ/compiler/embedded-expr.typ index c95184e4f..ee6e07f90 100644 --- a/tests/typ/compiler/embedded-expr.typ +++ b/tests/typ/compiler/embedded-expr.typ @@ -2,13 +2,13 @@ // Ref: false --- -// Error: 6-8 expected identifier, found keyword `as` +// Error: 6-8 expected pattern, found keyword `as` // Hint: 6-8 keyword `as` is not allowed as an identifier; try `as_` instead #let as = 1 + 2 --- #{ - // Error: 7-9 expected identifier, found keyword `as` + // Error: 7-9 expected pattern, found keyword `as` // Hint: 7-9 keyword `as` is not allowed as an identifier; try `as_` instead let as = 10 } diff --git a/tests/typ/compiler/for.typ b/tests/typ/compiler/for.typ index 64b5a1d48..392dd6764 100644 --- a/tests/typ/compiler/for.typ +++ b/tests/typ/compiler/for.typ @@ -92,19 +92,24 @@ --- // Destructuring without parentheses. -// Error: 7 expected keyword `in` -// Hint: 7 did you mean to use a destructuring pattern? +// Error: 7-8 unexpected comma +// Hint: 7-8 destructuring patterns must be wrapped in parentheses #for k, v in (a: 4, b: 5) { dont-care } -// Error: 5 expected identifier +// Error: 7-8 unexpected comma +// Hint: 7-8 destructuring patterns must be wrapped in parentheses +#for k, in () {} + +--- +// Error: 5 expected pattern #for -// Error: 5 expected identifier +// Error: 5 expected pattern #for// -// Error: 6 expected identifier +// Error: 6 expected pattern #{for} // Error: 7 expected keyword `in` @@ -116,15 +121,15 @@ // Error: 15 expected block #for v in iter -// Error: 5 expected identifier +// Error: 5 expected pattern #for v in iter {} -// Error: 6 expected identifier -// Error: 10 expected block +// Error: 7-10 expected pattern, found string +// Error: 16 expected block A#for "v" thing -// Error: 5 expected identifier +// Error: 6-9 expected pattern, found string #for "v" in iter {} // Error: 7 expected keyword `in` diff --git a/tests/typ/compiler/let.typ b/tests/typ/compiler/let.typ index 06f07394e..411509ff5 100644 --- a/tests/typ/compiler/let.typ +++ b/tests/typ/compiler/let.typ @@ -125,22 +125,22 @@ Three #test(a, 1) #test(b, 4) -// Error: 10-11 at most one binding per identifier is allowed +// Error: 10-11 duplicate binding: a #let (a, a) = (1, 2) -// Error: 12-15 at most one destructuring sink is allowed +// Error: 12-15 only one destructuring sink is allowed #let (..a, ..a) = (1, 2) -// Error: 12-13 at most one binding per identifier is allowed +// Error: 12-13 duplicate binding: a #let (a, ..a) = (1, 2) -// Error: 13-14 at most one binding per identifier is allowed +// Error: 13-14 duplicate binding: a #let (a: a, a) = (a: 1, b: 2) -// Error: 13-20 expected identifier, found function call +// Error: 13-20 expected pattern, found function call #let (a, b: b.at(0)) = (a: 1, b: 2) -// Error: 7-14 expected identifier or destructuring sink, found function call +// Error: 7-14 expected pattern, found function call #let (a.at(0),) = (1,) --- @@ -148,7 +148,7 @@ Three #let (a, b, c) = (1, 2) --- -// Error: 6-20 not enough elements to destructure +// Error: 7-10 not enough elements to destructure #let (..a, b, c, d) = (1, 2) --- @@ -193,6 +193,24 @@ Three #let (a, ..) = (a: 1, b: 2) #test(a, 1) +--- +// Ref: false +// Nested destructuring. +#let ((a, b), (key: c)) = ((1, 2), (key: 3)) +#test((a, b, c), (1, 2, 3)) + +--- +// Keyed destructuring is not currently supported. +// Error: 7-18 expected pattern, found string +#let ("spacy key": val) = ("spacy key": 123) +#val + +--- +// Keyed destructuring is not currently supported. +#let x = "spacy key" +// Error: 7-10 expected identifier, found group +#let ((x): v) = ("spacy key": 123) + --- // Trailing placeholders. // Error: 10-11 not enough elements to destructure @@ -200,8 +218,8 @@ Three #test(a, 1) --- -// Error: 10-13 expected identifier, found string -// Error: 18-19 expected identifier, found integer +// Error: 10-13 expected pattern, found string +// Error: 18-19 expected pattern, found integer #let (a: "a", b: 2) = (a: 1, b: 2) --- @@ -213,18 +231,17 @@ Three #let (a, b: b) = (a: 1) --- -// Error: 7-11 cannot destructure named elements from an array +// Error: 7-11 cannot destructure named pattern from an array #let (a: a, b) = (1, 2, 3) --- -// Error: 5 expected identifier +// Error: 5 expected pattern #let -// Error: 6 expected identifier +// Error: 6 expected pattern #{let} -// Error: 5 expected identifier -// Error: 5 expected semicolon or line break +// Error: 6-9 expected pattern, found string #let "v" // Error: 7 expected semicolon or line break @@ -233,8 +250,7 @@ Three // Error: 9 expected expression #let v = -// Error: 5 expected identifier -// Error: 5 expected semicolon or line break +// Error: 6-9 expected pattern, found string #let "v" = 1 // Terminated because expression ends. @@ -246,7 +262,7 @@ Three // Error: 11-12 unclosed delimiter #let v5 = (1, 2 + ; Five -// Error: 9-13 expected identifier, found boolean +// Error: 9-13 expected pattern, found boolean #let (..true) = false --- @@ -257,7 +273,7 @@ Three // Error: 2-3 unexpected underscore #_ -// Error: 8-9 unexpected underscore +// Error: 8-9 expected expression, found underscore #lorem(_) // Error: 3-4 expected expression, found underscore @@ -275,9 +291,11 @@ Three // Error: 15 expected expression #let func(x) = + --- // Error: 12 expected equals sign #let (func)(x) + --- // Error: 12 expected equals sign // Error: 15-15 expected semicolon or line break diff --git a/tests/typ/compiler/ops.typ b/tests/typ/compiler/ops.typ index 4d3b071f3..e148dd194 100644 --- a/tests/typ/compiler/ops.typ +++ b/tests/typ/compiler/ops.typ @@ -273,6 +273,38 @@ #test(a, ((2, 3, 4), 2)) #test(b, 1) +--- +// Test comma placement in destructuring assignment. +#let array = (1, 2, 3) +#((key: array.at(1)) = (key: "hi")) +#test(array, (1, "hi", 3)) + +#let array = (1, 2, 3) +#((array.at(1)) = ("hi")) +#test(array, (1, "hi", 3)) + +#let array = (1, 2, 3) +#((array.at(1),) = ("hi",)) +#test(array, (1, "hi", 3)) + +#let array = (1, 2, 3) +#((array.at(1)) = ("hi",)) +#test(array, (1, ("hi",), 3)) + +--- +// Test nested destructuring assignment. +#let a +#let b +#let c +#(((a, b), (key: c)) = ((1, 2), (key: 3))) +#test((a, b, c), (1, 2, 3)) + +--- +#let array = (1, 2, 3) +// Error: 3-17 cannot destructure string +#((array.at(1),) = ("hi")) +#test(array, (1, ("hi",), 3)) + --- // Error: 3-6 cannot mutate a constant: box #(box = 1) diff --git a/tests/typ/compiler/spread.typ b/tests/typ/compiler/spread.typ index f4864faf6..23cd587bc 100644 --- a/tests/typ/compiler/spread.typ +++ b/tests/typ/compiler/spread.typ @@ -61,11 +61,11 @@ #test(f(1, 2, 3), 3) --- -// Error: 13-19 cannot spread string +// Error: 11-19 cannot spread string #calc.min(.."nope") --- -// Error: 10-14 expected identifier, found boolean +// Error: 10-14 expected pattern, found boolean #let f(..true) = none --- @@ -90,11 +90,11 @@ } --- -// Error: 11-17 cannot spread dictionary into array +// Error: 9-17 cannot spread dictionary into array #(1, 2, ..(a: 1)) --- -// Error: 5-11 cannot spread array into dictionary +// Error: 3-11 cannot spread array into dictionary #(..(1, 2), a: 1) --- diff --git a/tests/typ/compute/foundations.typ b/tests/typ/compute/foundations.typ index cabed0bf3..e4b7ce6ab 100644 --- a/tests/typ/compute/foundations.typ +++ b/tests/typ/compute/foundations.typ @@ -86,7 +86,7 @@ #eval("RR_1^NN", mode: "math", scope: (RR: math.NN, NN: math.RR)) --- -// Error: 7-12 expected identifier +// Error: 7-12 expected pattern #eval("let") ---