From 4875633acf4701705b9b3b014eb7d94268b897c2 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sat, 23 Oct 2021 19:03:27 +0200 Subject: [PATCH 01/18] Change parser --- Cargo.toml | 3 +- src/eval/capture.rs | 106 +++- src/eval/mod.rs | 160 +++--- src/eval/walk.rs | 18 +- src/lib.rs | 8 +- src/parse/mod.rs | 1102 ++++++++++++++++++++----------------- src/parse/parser.rs | 417 ++++++++++---- src/parse/resolve.rs | 40 +- src/parse/tokens.rs | 519 +++++++++-------- src/source.rs | 24 +- src/syntax/expr.rs | 822 ++++++++++++++++++--------- src/syntax/ident.rs | 12 +- src/syntax/markup.rs | 176 ++++-- src/syntax/mod.rs | 688 ++++++++++++++++++++++- src/syntax/pretty.rs | 143 ++--- src/syntax/span.rs | 15 + src/syntax/token.rs | 271 +-------- src/syntax/visit.rs | 263 --------- tests/typ/code/array.typ | 2 +- tests/typ/code/call.typ | 2 +- tests/typ/code/dict.typ | 2 +- tests/typ/code/import.typ | 3 +- tests/typ/code/spread.typ | 6 +- 23 files changed, 2932 insertions(+), 1870 deletions(-) delete mode 100644 src/syntax/visit.rs diff --git a/Cargo.toml b/Cargo.toml index c7fa703c4..6a5b72b99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,10 +5,11 @@ authors = ["The Typst Project Developers"] edition = "2018" [features] -default = ["cli", "fs", "layout-cache"] +default = ["cli", "fs", "layout-cache", "parse-cache"] cli = ["anyhow", "codespan-reporting", "fs", "pico-args", "same-file"] fs = ["dirs", "memmap2", "same-file", "walkdir"] layout-cache = ["rand"] +parse-cache = [] [profile.dev] # Faster compilation diff --git a/src/eval/capture.rs b/src/eval/capture.rs index f0a2b7292..baf597472 100644 --- a/src/eval/capture.rs +++ b/src/eval/capture.rs @@ -1,8 +1,7 @@ use std::rc::Rc; use super::{Scope, Scopes, Value}; -use crate::syntax::visit::{immutable::visit_expr, Visit}; -use crate::syntax::{Expr, Ident}; +use crate::syntax::{ClosureParam, Expr, Imports, RedTicket}; /// A visitor that captures variable slots. pub struct CapturesVisitor<'a> { @@ -21,36 +20,83 @@ impl<'a> CapturesVisitor<'a> { } } + pub fn visit(&mut self, node: RedTicket) { + let expr: Option = node.cast(); + + match expr.as_ref() { + Some(Expr::Let(expr)) => { + self.visit(expr.init_ticket()); + let ident = expr.binding(); + self.internal.def_mut(ident.as_str(), Value::None); + } + Some(Expr::Closure(closure)) => { + for arg in closure.params() { + match arg { + ClosureParam::Pos(ident) | ClosureParam::Sink(ident) => { + self.internal.def_mut(ident.as_str(), Value::None); + } + ClosureParam::Named(name) => { + self.internal.def_mut(name.name().as_str(), Value::None); + } + } + } + self.visit(closure.body_ticket()); + } + Some(Expr::For(forloop)) => { + let pattern = forloop.pattern(); + self.internal.def_mut(pattern.value().as_str(), Value::None); + + if let Some(key) = pattern.key() { + self.internal.def_mut(key.as_str(), Value::None); + } + self.visit(forloop.body_ticket()); + } + Some(Expr::Import(import)) => { + if let Imports::Idents(idents) = import.imports() { + for ident in idents { + self.internal.def_mut(ident.as_str(), Value::None); + } + } + } + Some(Expr::Ident(ident)) => { + if self.internal.get(ident.as_str()).is_none() { + if let Some(slot) = self.external.get(ident.as_str()) { + self.captures.def_slot(ident.as_str(), Rc::clone(slot)); + } + } + } + _ => {} + } + + match expr.as_ref() { + Some(Expr::Let(_)) | Some(Expr::For(_)) | Some(Expr::Closure(_)) => {} + + Some(Expr::Block(_)) => { + self.internal.enter(); + for child in node.own().children() { + self.visit(child); + } + self.internal.exit(); + } + + Some(Expr::Template(_)) => { + self.internal.enter(); + for child in node.own().children() { + self.visit(child); + } + self.internal.exit(); + } + + _ => { + for child in node.own().children() { + self.visit(child); + } + } + } + } + /// Return the scope of captured variables. pub fn finish(self) -> Scope { self.captures } } - -impl<'ast> Visit<'ast> for CapturesVisitor<'_> { - fn visit_expr(&mut self, node: &'ast Expr) { - if let Expr::Ident(ident) = node { - // Find out whether the name is not locally defined and if so if it - // can be captured. - if self.internal.get(ident).is_none() { - if let Some(slot) = self.external.get(ident) { - self.captures.def_slot(ident.as_str(), Rc::clone(slot)); - } - } - } else { - visit_expr(self, node); - } - } - - fn visit_binding(&mut self, ident: &'ast Ident) { - self.internal.def_mut(ident.as_str(), Value::None); - } - - fn visit_enter(&mut self) { - self.internal.enter(); - } - - fn visit_exit(&mut self) { - self.internal.exit(); - } -} diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 691e3c494..296e33808 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -36,9 +36,7 @@ use crate::diag::{At, Error, StrResult, Trace, Tracepoint, TypResult}; use crate::geom::{Angle, Fractional, Length, Relative}; use crate::image::ImageStore; use crate::loading::Loader; -use crate::parse::parse; use crate::source::{SourceId, SourceStore}; -use crate::syntax::visit::Visit; use crate::syntax::*; use crate::util::RefMutExt; use crate::Context; @@ -114,7 +112,7 @@ impl<'a> EvalContext<'a> { // Parse the file. let source = self.sources.get(id); - let ast = parse(&source)?; + let ast = source.ast()?; // Prepare the new context. let new_scopes = Scopes::new(self.scopes.base); @@ -122,7 +120,7 @@ impl<'a> EvalContext<'a> { self.route.push(id); // Evaluate the module. - let template = Rc::new(ast).eval(self).trace(|| Tracepoint::Import, span)?; + let template = ast.eval(self).trace(|| Tracepoint::Import, span)?; // Restore the old context. let new_scopes = mem::replace(&mut self.scopes, old_scopes); @@ -232,7 +230,7 @@ impl Eval for ArrayExpr { type Output = Array; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - self.items.iter().map(|expr| expr.eval(ctx)).collect() + self.items().iter().map(|expr| expr.eval(ctx)).collect() } } @@ -240,9 +238,9 @@ impl Eval for DictExpr { type Output = Dict; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - self.items + self.items() .iter() - .map(|Named { name, expr }| Ok(((&name.string).into(), expr.eval(ctx)?))) + .map(|x| Ok(((&x.name().string).into(), x.expr().eval(ctx)?))) .collect() } } @@ -251,7 +249,7 @@ impl Eval for TemplateExpr { type Output = Template; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - self.body.eval(ctx) + self.body().eval(ctx) } } @@ -259,7 +257,7 @@ impl Eval for GroupExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - self.expr.eval(ctx) + self.expr().eval(ctx) } } @@ -270,7 +268,7 @@ impl Eval for BlockExpr { ctx.scopes.enter(); let mut output = Value::None; - for expr in &self.exprs { + for expr in &self.exprs() { let value = expr.eval(ctx)?; output = ops::join(output, value).at(expr.span())?; } @@ -285,13 +283,13 @@ impl Eval for UnaryExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let value = self.expr.eval(ctx)?; - let result = match self.op { + let value = self.expr().eval(ctx)?; + let result = match self.op() { UnOp::Pos => ops::pos(value), UnOp::Neg => ops::neg(value), UnOp::Not => ops::not(value), }; - result.at(self.span) + result.at(self.span()) } } @@ -299,7 +297,7 @@ impl Eval for BinaryExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - match self.op { + match self.op() { BinOp::Add => self.apply(ctx, ops::add), BinOp::Sub => self.apply(ctx, ops::sub), BinOp::Mul => self.apply(ctx, ops::mul), @@ -327,17 +325,17 @@ impl BinaryExpr { where F: FnOnce(Value, Value) -> StrResult, { - let lhs = self.lhs.eval(ctx)?; + let lhs = self.lhs().eval(ctx)?; // Short-circuit boolean operations. - if (self.op == BinOp::And && lhs == Value::Bool(false)) - || (self.op == BinOp::Or && lhs == Value::Bool(true)) + if (self.op() == BinOp::And && lhs == Value::Bool(false)) + || (self.op() == BinOp::Or && lhs == Value::Bool(true)) { return Ok(lhs); } - let rhs = self.rhs.eval(ctx)?; - op(lhs, rhs).at(self.span) + let rhs = self.rhs().eval(ctx)?; + op(lhs, rhs).at(self.span()) } /// Apply an assignment operation. @@ -345,10 +343,10 @@ impl BinaryExpr { where F: FnOnce(Value, Value) -> StrResult, { - let rhs = self.rhs.eval(ctx)?; - let mut target = self.lhs.access(ctx)?; + let rhs = self.rhs().eval(ctx)?; + let mut target = self.lhs().access(ctx)?; let lhs = mem::take(&mut *target); - *target = op(lhs, rhs).at(self.span)?; + *target = op(lhs, rhs).at(self.span())?; Ok(Value::None) } } @@ -357,27 +355,27 @@ impl Eval for CallExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let callee = self.callee.eval(ctx)?; - let mut args = self.args.eval(ctx)?; + let callee = self.callee().eval(ctx)?; + let mut args = self.args().eval(ctx)?; match callee { Value::Array(array) => { - array.get(args.into_index()?).map(Value::clone).at(self.span) + array.get(args.into_index()?).map(Value::clone).at(self.span()) } Value::Dict(dict) => { - dict.get(args.into_key()?).map(Value::clone).at(self.span) + dict.get(args.into_key()?).map(Value::clone).at(self.span()) } Value::Func(func) => { let point = || Tracepoint::Call(func.name().map(ToString::to_string)); - let value = func.call(ctx, &mut args).trace(point, self.span)?; + let value = func.call(ctx, &mut args).trace(point, self.span())?; args.finish()?; Ok(value) } v => bail!( - self.callee.span(), + self.callee().span(), "expected function or collection, found {}", v.type_name(), ), @@ -389,9 +387,9 @@ impl Eval for CallArgs { type Output = Args; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let mut items = Vec::with_capacity(self.items.len()); + let mut items = Vec::with_capacity(self.items().len()); - for arg in &self.items { + for arg in &self.items() { let span = arg.span(); match arg { CallArg::Pos(expr) => { @@ -401,11 +399,11 @@ impl Eval for CallArgs { value: Spanned::new(expr.eval(ctx)?, expr.span()), }); } - CallArg::Named(Named { name, expr }) => { + CallArg::Named(x) => { items.push(Arg { span, - name: Some((&name.string).into()), - value: Spanned::new(expr.eval(ctx)?, expr.span()), + name: Some((&x.name().string).into()), + value: Spanned::new(x.expr().eval(ctx)?, x.expr().span()), }); } CallArg::Spread(expr) => match expr.eval(ctx)? { @@ -438,7 +436,7 @@ impl Eval for CallArgs { } } - Ok(Args { span: self.span, items }) + Ok(Args { span: self.span(), items }) } } @@ -446,26 +444,27 @@ impl Eval for ClosureExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let name = self.name.as_ref().map(|name| name.string.clone()); + let name = self.name().as_ref().map(|name| name.string.clone()); // Collect captured variables. let captured = { let mut visitor = CapturesVisitor::new(&ctx.scopes); - visitor.visit_closure(self); + visitor.visit(self.underlying()); visitor.finish() }; let mut sink = None; - let mut params = Vec::with_capacity(self.params.len()); + let params_src = self.params(); + let mut params = Vec::with_capacity(params_src.len()); // Collect parameters and an optional sink parameter. - for param in &self.params { + for param in ¶ms_src { match param { ClosureParam::Pos(name) => { params.push((name.string.clone(), None)); } - ClosureParam::Named(Named { name, expr }) => { - params.push((name.string.clone(), Some(expr.eval(ctx)?))); + ClosureParam::Named(x) => { + params.push((x.name().string.clone(), Some(x.expr().eval(ctx)?))); } ClosureParam::Sink(name) => { if sink.is_some() { @@ -478,7 +477,7 @@ impl Eval for ClosureExpr { // Clone the body expression so that we don't have a lifetime // dependence on the AST. - let body = Rc::clone(&self.body); + let body = Rc::new(self.body()); // Define the actual function. let func = Function::new(name, move |ctx, args| { @@ -515,8 +514,9 @@ impl Eval for WithExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let wrapped = self.callee.eval(ctx)?.cast::().at(self.callee.span())?; - let applied = self.args.eval(ctx)?; + let wrapped = + self.callee().eval(ctx)?.cast::().at(self.callee().span())?; + let applied = self.args().eval(ctx)?; let name = wrapped.name().cloned(); let func = Function::new(name, move |ctx, args| { @@ -532,11 +532,11 @@ impl Eval for LetExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let value = match &self.init { + let value = match &self.init() { Some(expr) => expr.eval(ctx)?, None => Value::None, }; - ctx.scopes.def_mut(self.binding.as_str(), value); + ctx.scopes.def_mut(self.binding().as_str(), value); Ok(Value::None) } } @@ -545,12 +545,15 @@ impl Eval for IfExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let condition = - self.condition.eval(ctx)?.cast::().at(self.condition.span())?; + let condition = self + .condition() + .eval(ctx)? + .cast::() + .at(self.condition().span())?; if condition { - self.if_body.eval(ctx) - } else if let Some(else_body) = &self.else_body { + self.if_body().eval(ctx) + } else if let Some(else_body) = &self.else_body() { else_body.eval(ctx) } else { Ok(Value::None) @@ -564,9 +567,14 @@ impl Eval for WhileExpr { fn eval(&self, ctx: &mut EvalContext) -> TypResult { let mut output = Value::None; - while self.condition.eval(ctx)?.cast::().at(self.condition.span())? { - let value = self.body.eval(ctx)?; - output = ops::join(output, value).at(self.body.span())?; + while self + .condition() + .eval(ctx)? + .cast::() + .at(self.condition().span())? + { + let value = self.body().eval(ctx)?; + output = ops::join(output, value).at(self.body().span())?; } Ok(output) @@ -586,9 +594,9 @@ impl Eval for ForExpr { for ($($value),*) in $iter { $(ctx.scopes.def_mut($binding.as_str(), $value);)* - let value = self.body.eval(ctx)?; + let value = self.body().eval(ctx)?; output = ops::join(output, value) - .at(self.body.span())?; + .at(self.body().span())?; } ctx.scopes.exit(); @@ -596,28 +604,27 @@ impl Eval for ForExpr { }}; } - let iter = self.iter.eval(ctx)?; - match (&self.pattern, iter) { - (ForPattern::Value(v), Value::Str(string)) => { - iter!(for (v => value) in string.iter()) - } - (ForPattern::Value(v), Value::Array(array)) => { + let iter = self.iter().eval(ctx)?; + let pattern = self.pattern(); + match (pattern.key(), pattern.value(), iter) { + (None, v, Value::Str(string)) => iter!(for (v => value) in string.iter()), + (None, v, Value::Array(array)) => { iter!(for (v => value) in array.into_iter()) } - (ForPattern::KeyValue(i, v), Value::Array(array)) => { + (Some(i), v, Value::Array(array)) => { iter!(for (i => idx, v => value) in array.into_iter().enumerate()) } - (ForPattern::Value(v), Value::Dict(dict)) => { + (None, v, Value::Dict(dict)) => { iter!(for (v => value) in dict.into_iter().map(|p| p.1)) } - (ForPattern::KeyValue(k, v), Value::Dict(dict)) => { + (Some(k), v, Value::Dict(dict)) => { iter!(for (k => key, v => value) in dict.into_iter()) } - (ForPattern::KeyValue(_, _), Value::Str(_)) => { - bail!(self.pattern.span(), "mismatched pattern"); + (_, _, Value::Str(_)) => { + bail!(pattern.span(), "mismatched pattern"); } - (_, iter) => { - bail!(self.iter.span(), "cannot loop over {}", iter.type_name()); + (_, _, iter) => { + bail!(self.iter().span(), "cannot loop over {}", iter.type_name()); } } } @@ -627,12 +634,12 @@ impl Eval for ImportExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let path = self.path.eval(ctx)?.cast::().at(self.path.span())?; + let path = self.path().eval(ctx)?.cast::().at(self.path().span())?; - let file = ctx.import(&path, self.path.span())?; + let file = ctx.import(&path, self.path().span())?; let module = &ctx.modules[&file]; - match &self.imports { + match &self.imports() { Imports::Wildcard => { for (var, slot) in module.scope.iter() { ctx.scopes.def_mut(var, slot.borrow().clone()); @@ -657,9 +664,10 @@ impl Eval for IncludeExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let path = self.path.eval(ctx)?.cast::().at(self.path.span())?; + let path_node = self.path(); + let path = path_node.eval(ctx)?.cast::().at(path_node.span())?; - let file = ctx.import(&path, self.path.span())?; + let file = ctx.import(&path, path_node.span())?; let module = &ctx.modules[&file]; Ok(Value::Template(module.template.clone())) @@ -698,14 +706,14 @@ impl Access for Ident { impl Access for CallExpr { fn access<'a>(&self, ctx: &'a mut EvalContext) -> TypResult> { - let args = self.args.eval(ctx)?; - let guard = self.callee.access(ctx)?; + let args = self.args().eval(ctx)?; + let guard = self.callee().access(ctx)?; RefMut::try_map(guard, |value| match value { - Value::Array(array) => array.get_mut(args.into_index()?).at(self.span), + Value::Array(array) => array.get_mut(args.into_index()?).at(self.span()), Value::Dict(dict) => Ok(dict.get_mut(args.into_key()?)), v => bail!( - self.callee.span(), + self.callee().span(), "expected collection, found {}", v.type_name(), ), diff --git a/src/eval/walk.rs b/src/eval/walk.rs index 961383381..e4d7f61a1 100644 --- a/src/eval/walk.rs +++ b/src/eval/walk.rs @@ -27,10 +27,10 @@ impl Walk for MarkupNode { fn walk(&self, ctx: &mut EvalContext) -> TypResult<()> { match self { Self::Space => ctx.template.space(), - Self::Linebreak(_) => ctx.template.linebreak(), - Self::Parbreak(_) => ctx.template.parbreak(), - Self::Strong(_) => ctx.template.modify(|s| s.text_mut().strong.flip()), - Self::Emph(_) => ctx.template.modify(|s| s.text_mut().emph.flip()), + Self::Linebreak => ctx.template.linebreak(), + Self::Parbreak => ctx.template.parbreak(), + Self::Strong => ctx.template.modify(|s| s.text_mut().strong.flip()), + Self::Emph => ctx.template.modify(|s| s.text_mut().emph.flip()), Self::Text(text) => ctx.template.text(text), Self::Raw(raw) => raw.walk(ctx)?, Self::Heading(heading) => heading.walk(ctx)?, @@ -69,8 +69,8 @@ impl Walk for RawNode { impl Walk for HeadingNode { fn walk(&self, ctx: &mut EvalContext) -> TypResult<()> { - let level = self.level; - let body = self.body.eval(ctx)?; + let level = self.level().0; + let body = self.body().eval(ctx)?; ctx.template.parbreak(); ctx.template.save(); @@ -90,7 +90,7 @@ impl Walk for HeadingNode { impl Walk for ListNode { fn walk(&self, ctx: &mut EvalContext) -> TypResult<()> { - let body = self.body.eval(ctx)?; + let body = self.body().eval(ctx)?; walk_item(ctx, Str::from('•'), body); Ok(()) } @@ -98,8 +98,8 @@ impl Walk for ListNode { impl Walk for EnumNode { fn walk(&self, ctx: &mut EvalContext) -> TypResult<()> { - let body = self.body.eval(ctx)?; - let label = format_str!("{}.", self.number.unwrap_or(1)); + let body = self.body().eval(ctx)?; + let label = format_str!("{}.", self.number().0.unwrap_or(1)); walk_item(ctx, label, body); Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 41b2e88b9..468c06d8c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,7 +58,6 @@ use crate::layout::{EvictionPolicy, LayoutCache}; use crate::loading::Loader; use crate::source::{SourceId, SourceStore}; use crate::style::Style; -use crate::syntax::Markup; /// The core context which holds the loader, configuration and cached artifacts. pub struct Context { @@ -100,14 +99,9 @@ impl Context { &self.style } - /// Parse a source file and return the resulting markup. - pub fn parse(&mut self, id: SourceId) -> TypResult { - parse::parse(self.sources.get(id)) - } - /// Evaluate a source file and return the resulting module. pub fn evaluate(&mut self, id: SourceId) -> TypResult { - let ast = self.parse(id)?; + let ast = self.sources.get(id).ast()?; eval::eval(self, id, &ast) } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 307874232..dc7691833 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -12,215 +12,213 @@ pub use tokens::*; use std::rc::Rc; -use crate::diag::TypResult; use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; /// Parse a source file. -pub fn parse(source: &SourceFile) -> TypResult { +pub fn parse(source: &SourceFile) -> Rc { let mut p = Parser::new(source); - let markup = markup(&mut p); - let errors = p.finish(); - if errors.is_empty() { - Ok(markup) - } else { - Err(Box::new(errors)) - } + markup(&mut p); + p.finish() } /// Parse markup. -fn markup(p: &mut Parser) -> Markup { +fn markup(p: &mut Parser) { markup_while(p, true, &mut |_| true) } -/// Parse markup that stays equal or right of the given column. -fn markup_indented(p: &mut Parser, column: usize) -> Markup { +/// Parse markup that stays right of the given column. +fn markup_indented(p: &mut Parser, column: usize) { + // TODO this is broken p.eat_while(|t| match t { - Token::Space(n) => n == 0, - Token::LineComment(_) | Token::BlockComment(_) => true, + NodeKind::Space(n) => n == 0, + NodeKind::LineComment | NodeKind::BlockComment => true, _ => false, }); markup_while(p, false, &mut |p| match p.peek() { - Some(Token::Space(n)) if n >= 1 => p.column(p.next_end()) >= column, + Some(NodeKind::Space(n)) if n >= 1 => p.column(p.next_end()) >= column, _ => true, }) } -/// Parse a syntax tree while the peeked token satisifies a condition. +/// Parse a syntax tree while the peeked NodeKind satisifies a condition. /// /// If `at_start` is true, things like headings that may only appear at the /// beginning of a line or template are allowed. -fn markup_while(p: &mut Parser, mut at_start: bool, f: &mut F) -> Markup +fn markup_while(p: &mut Parser, mut at_start: bool, f: &mut F) where F: FnMut(&mut Parser) -> bool, { - let mut tree = vec![]; + p.start(); while !p.eof() && f(p) { - if let Some(node) = markup_node(p, &mut at_start) { - at_start &= matches!(node, MarkupNode::Space | MarkupNode::Parbreak(_)); - tree.push(node); + markup_node(p, &mut at_start); + if let Some(node) = p.last_child() { + at_start &= matches!(node.kind(), &NodeKind::Space(_) | &NodeKind::Parbreak | &NodeKind::LineComment | &NodeKind::BlockComment); } } - tree + p.end(NodeKind::Markup); } /// Parse a markup node. -fn markup_node(p: &mut Parser, at_start: &mut bool) -> Option { - let token = p.peek()?; - let span = p.peek_span(); - let node = match token { - // Whitespace. - Token::Space(newlines) => { - *at_start |= newlines > 0; - if newlines < 2 { - MarkupNode::Space - } else { - MarkupNode::Parbreak(span) +fn markup_node(p: &mut Parser, at_start: &mut bool) { + if let Some(token) = p.peek() { + match token { + // Whitespace. + NodeKind::Space(newlines) => { + *at_start |= newlines > 0; + + if newlines < 2 { + p.eat(); + } else { + p.convert(NodeKind::Parbreak); + } } - } - // Text. - Token::Text(text) => MarkupNode::Text(text.into()), - Token::Tilde => MarkupNode::Text("\u{00A0}".into()), - Token::HyphHyph => MarkupNode::Text("\u{2013}".into()), - Token::HyphHyphHyph => MarkupNode::Text("\u{2014}".into()), - Token::UnicodeEscape(t) => MarkupNode::Text(unicode_escape(p, t)), + // Text. + NodeKind::UnicodeEscape(u) => { + if !u.terminated { + p.convert(NodeKind::Error( + ErrorPosition::End, + "expected closing brace".into(), + )); + p.unsuccessful(); + return; + } - // Markup. - Token::Backslash => MarkupNode::Linebreak(span), - Token::Star => MarkupNode::Strong(span), - Token::Underscore => MarkupNode::Emph(span), - Token::Raw(t) => raw(p, t), - Token::Eq if *at_start => return Some(heading(p)), - Token::Hyph if *at_start => return Some(list_node(p)), - Token::Numbering(number) if *at_start => return Some(enum_node(p, number)), + if u.character.is_none() { + let src = p.peek_src(); + p.convert(NodeKind::Error( + ErrorPosition::Full, + "invalid unicode escape sequence".into(), + )); + p.start(); + p.end(NodeKind::Text(src.into())); + return; + } - // Line-based markup that is not currently at the start of the line. - Token::Eq | Token::Hyph | Token::Numbering(_) => { - MarkupNode::Text(p.peek_src().into()) - } - - // Hashtag + keyword / identifier. - Token::Ident(_) - | Token::Let - | Token::If - | Token::While - | Token::For - | Token::Import - | Token::Include => { - let stmt = matches!(token, Token::Let | Token::Import); - let group = if stmt { Group::Stmt } else { Group::Expr }; - - p.start_group(group, TokenMode::Code); - let expr = expr_with(p, true, 0); - if stmt && expr.is_some() && !p.eof() { - p.expected_at(p.prev_end(), "semicolon or line break"); + p.eat(); } - p.end_group(); + NodeKind::Raw(r) => { + if !r.terminated { + p.convert(NodeKind::Error( + ErrorPosition::End, + "expected backtick(s)".into(), + )); + p.unsuccessful(); + return; + } - return expr.map(MarkupNode::Expr); - } + p.eat(); + } + NodeKind::Text(_) + | NodeKind::EnDash + | NodeKind::EmDash + | NodeKind::NonBreakingSpace => { + p.eat(); + } - // Block and template. - Token::LeftBrace => return Some(MarkupNode::Expr(block(p))), - Token::LeftBracket => return Some(MarkupNode::Expr(template(p))), + // Markup. + NodeKind::Emph | NodeKind::Strong | NodeKind::Linebreak => { + p.eat(); + } - // Comments. - Token::LineComment(_) | Token::BlockComment(_) => { - p.eat(); - return None; - } + NodeKind::Eq if *at_start => heading(p), + NodeKind::ListBullet if *at_start => list_node(p), + NodeKind::EnumNumbering(_) if *at_start => enum_node(p), - _ => { - *at_start = false; - p.unexpected(); - return None; - } - }; - p.eat(); - Some(node) -} + // Line-based markup that is not currently at the start of the line. + NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { + p.convert(NodeKind::Text(p.peek_src().into())) + } -/// Handle a unicode escape sequence. -fn unicode_escape(p: &mut Parser, token: UnicodeEscapeToken) -> EcoString { - let span = p.peek_span(); - let text = if let Some(c) = resolve::resolve_hex(token.sequence) { - c.into() - } else { - // Print out the escape sequence verbatim if it is invalid. - p.error(span, "invalid unicode escape sequence"); - p.peek_src().into() - }; + // Hashtag + keyword / identifier. + NodeKind::Ident(_) + | NodeKind::Let + | NodeKind::If + | NodeKind::While + | NodeKind::For + | NodeKind::Import + | NodeKind::Include => { + let stmt = matches!(token, NodeKind::Let | NodeKind::Import); + let group = if stmt { Group::Stmt } else { Group::Expr }; - if !token.terminated { - p.error(span.end, "expected closing brace"); + p.start_group(group, TokenMode::Code); + expr_with(p, true, 0); + if stmt && p.success() && !p.eof() { + p.expected_at("semicolon or line break"); + } + p.end_group(); + } + + // Block and template. + NodeKind::LeftBrace => { + block(p); + } + NodeKind::LeftBracket => { + template(p); + } + + // Comments. + NodeKind::LineComment | NodeKind::BlockComment => { + p.eat(); + } + + _ => { + *at_start = false; + p.unexpected(); + } + }; } - - text -} - -/// Handle a raw block. -fn raw(p: &mut Parser, token: RawToken) -> MarkupNode { - let column = p.column(p.next_start()); - let span = p.peek_span(); - let raw = resolve::resolve_raw(span, column, token.backticks, token.text); - if !token.terminated { - p.error(span.end, "expected backtick(s)"); - } - MarkupNode::Raw(Box::new(raw)) } /// Parse a heading. -fn heading(p: &mut Parser) -> MarkupNode { - let start = p.next_start(); - p.eat_assert(Token::Eq); +fn heading(p: &mut Parser) { + p.start(); + p.start(); + p.eat_assert(NodeKind::Eq); // Count depth. let mut level: usize = 1; - while p.eat_if(Token::Eq) { + while p.eat_if(NodeKind::Eq) { level += 1; } if level > 6 { - return MarkupNode::Text(p.get(start .. p.prev_end()).into()); + p.lift(); + p.end(NodeKind::Text(EcoString::from('=').repeat(level))); + } else { + p.end(NodeKind::HeadingLevel(level as u8)); + let column = p.column(p.prev_end()); + markup_indented(p, column); + p.end(NodeKind::Heading); } - - let column = p.column(p.prev_end()); - let body = markup_indented(p, column); - MarkupNode::Heading(Box::new(HeadingNode { - span: p.span_from(start), - level, - body, - })) } /// Parse a single list item. -fn list_node(p: &mut Parser) -> MarkupNode { - let start = p.next_start(); - p.eat_assert(Token::Hyph); +fn list_node(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::ListBullet); let column = p.column(p.prev_end()); - let body = markup_indented(p, column); - MarkupNode::List(Box::new(ListNode { span: p.span_from(start), body })) + markup_indented(p, column); + p.end(NodeKind::List); } /// Parse a single enum item. -fn enum_node(p: &mut Parser, number: Option) -> MarkupNode { - let start = p.next_start(); - p.eat_assert(Token::Numbering(number)); +fn enum_node(p: &mut Parser) { + p.start(); + if !matches!(p.eat(), Some(NodeKind::EnumNumbering(_))) { + panic!("enum item does not start with numbering") + }; let column = p.column(p.prev_end()); - let body = markup_indented(p, column); - MarkupNode::Enum(Box::new(EnumNode { - span: p.span_from(start), - number, - body, - })) + markup_indented(p, column); + p.end(NodeKind::Enum); } /// Parse an expression. -fn expr(p: &mut Parser) -> Option { +fn expr(p: &mut Parser) { expr_with(p, false, 0) } @@ -231,134 +229,167 @@ fn expr(p: &mut Parser) -> Option { /// in markup. /// /// Stops parsing at operations with lower precedence than `min_prec`, -fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> Option { - let start = p.next_start(); - let mut lhs = match p.eat_map(UnOp::from_token) { +fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { + p.start(); + let mut offset = p.child_count(); + // Start the unary expression. + match p.eat_map(|x| UnOp::from_token(&x)) { Some(op) => { let prec = op.precedence(); - let expr = expr_with(p, atomic, prec)?; - Expr::Unary(Box::new(UnaryExpr { span: p.span_from(start), op, expr })) + expr_with(p, atomic, prec); + + if p.may_lift_abort() { + return; + } + + p.end_and_start_with(NodeKind::Unary); + } + None => { + primary(p, atomic); + if p.may_lift_abort() { + return; + } } - None => primary(p, atomic)?, }; loop { // Exclamation mark, parenthesis or bracket means this is a function // call. - if matches!(p.peek_direct(), Some(Token::LeftParen | Token::LeftBracket)) { - lhs = call(p, lhs)?; + if matches!( + p.peek_direct(), + Some(NodeKind::LeftParen | NodeKind::LeftBracket) + ) { + call(p, p.child_count() - offset); continue; } - if p.eat_if(Token::With) { - lhs = with_expr(p, lhs)?; + if p.peek() == Some(NodeKind::With) { + with_expr(p, p.child_count() - offset); + + if p.may_lift_abort() { + return; + } } if atomic { + p.lift(); break; } - let op = match p.peek().and_then(BinOp::from_token) { + let op = match p.peek().as_ref().and_then(BinOp::from_token) { Some(binop) => binop, - None => break, + None => { + p.lift(); + break; + } }; let mut prec = op.precedence(); if prec < min_prec { - break; + { + p.lift(); + break; + }; } p.eat(); + match op.associativity() { Associativity::Left => prec += 1, Associativity::Right => {} } - let rhs = match expr_with(p, atomic, prec) { - Some(rhs) => rhs, - None => break, - }; + expr_with(p, atomic, prec); - let span = lhs.span().join(rhs.span()); - lhs = Expr::Binary(Box::new(BinaryExpr { span, lhs, op, rhs })); + if !p.success() { + p.lift(); + break; + } + + offset = p.end_and_start_with(NodeKind::Binary).0; } - - Some(lhs) } /// Parse a primary expression. -fn primary(p: &mut Parser, atomic: bool) -> Option { - if let Some(expr) = literal(p) { - return Some(expr); +fn primary(p: &mut Parser, atomic: bool) { + if literal(p) { + return; } match p.peek() { // Things that start with an identifier. - Some(Token::Ident(string)) => { - let ident = Ident { - span: p.eat_span(), - string: string.into(), - }; + Some(NodeKind::Ident(_)) => { + // Start closure params. + p.start(); + p.eat(); // Arrow means this is a closure's lone parameter. - Some(if !atomic && p.eat_if(Token::Arrow) { - let body = expr(p)?; - Expr::Closure(Box::new(ClosureExpr { - span: ident.span.join(body.span()), - name: None, - params: vec![ClosureParam::Pos(ident)], - body: Rc::new(body), - })) + if !atomic && p.peek() == Some(NodeKind::Arrow) { + p.end_and_start_with(NodeKind::ClosureParams); + p.eat(); + + expr(p); + + p.end_or_abort(NodeKind::Closure); } else { - Expr::Ident(Box::new(ident)) - }) + p.lift(); + } } // Structures. - Some(Token::LeftParen) => parenthesized(p), - Some(Token::LeftBracket) => Some(template(p)), - Some(Token::LeftBrace) => Some(block(p)), + Some(NodeKind::LeftParen) => parenthesized(p), + Some(NodeKind::LeftBracket) => template(p), + Some(NodeKind::LeftBrace) => block(p), // Keywords. - Some(Token::Let) => let_expr(p), - Some(Token::If) => if_expr(p), - Some(Token::While) => while_expr(p), - Some(Token::For) => for_expr(p), - Some(Token::Import) => import_expr(p), - Some(Token::Include) => include_expr(p), + Some(NodeKind::Let) => let_expr(p), + Some(NodeKind::If) => if_expr(p), + Some(NodeKind::While) => while_expr(p), + Some(NodeKind::For) => for_expr(p), + Some(NodeKind::Import) => import_expr(p), + Some(NodeKind::Include) => include_expr(p), // Nothing. _ => { p.expected("expression"); - None + p.unsuccessful(); } } } /// Parse a literal. -fn literal(p: &mut Parser) -> Option { - let span = p.peek_span(); - let lit = match p.peek()? { - // Basic values. - Token::None => Lit::None(span), - Token::Auto => Lit::Auto(span), - Token::Bool(b) => Lit::Bool(span, b), - Token::Int(i) => Lit::Int(span, i), - Token::Float(f) => Lit::Float(span, f), - Token::Length(val, unit) => Lit::Length(span, val, unit), - Token::Angle(val, unit) => Lit::Angle(span, val, unit), - Token::Percent(p) => Lit::Percent(span, p), - Token::Fraction(p) => Lit::Fractional(span, p), - Token::Str(token) => Lit::Str(span, { - if !token.terminated { - p.expected_at(span.end, "quote"); - } - resolve::resolve_string(token.string) - }), - _ => return None, +fn literal(p: &mut Parser) -> bool { + let peeked = if let Some(p) = p.peek() { + p + } else { + return false; }; - p.eat(); - Some(Expr::Lit(Box::new(lit))) + + match peeked { + // Basic values. + NodeKind::None + | NodeKind::Auto + | NodeKind::Int(_) + | NodeKind::Float(_) + | NodeKind::Bool(_) + | NodeKind::Fraction(_) + | NodeKind::Length(_, _) + | NodeKind::Angle(_, _) + | NodeKind::Percentage(_) => { + p.eat(); + } + NodeKind::Str(s) => { + p.eat(); + if !s.terminated { + p.expected_at("quote"); + } + } + _ => { + return false; + } + } + + true } /// Parse something that starts with a parenthesis, which can be either of: @@ -366,433 +397,508 @@ fn literal(p: &mut Parser) -> Option { /// - Dictionary literal /// - Parenthesized expression /// - Parameter list of closure expression -fn parenthesized(p: &mut Parser) -> Option { +fn parenthesized(p: &mut Parser) { + let offset = p.child_count(); + p.start(); p.start_group(Group::Paren, TokenMode::Code); - let colon = p.eat_if(Token::Colon); - let (items, has_comma) = collection(p); - let span = p.end_group(); + let colon = p.eat_if(NodeKind::Colon); + let kind = collection(p).0; + p.end_group(); + let token_count = p.child_count() - offset; - // Leading colon makes this a dictionary. + // Leading colon makes this a (empty) dictionary. if colon { - return Some(dict(p, items, span)); + p.lift(); + dict(p, token_count); + return; } // Arrow means this is a closure's parameter list. - if p.eat_if(Token::Arrow) { - let params = params(p, items); - let body = expr(p)?; - return Some(Expr::Closure(Box::new(ClosureExpr { - span: span.join(body.span()), - name: None, - params, - body: Rc::new(body), - }))); + if p.peek() == Some(NodeKind::Arrow) { + p.start_with(token_count); + params(p, 0, true); + p.end(NodeKind::ClosureParams); + + p.eat_assert(NodeKind::Arrow); + + expr(p); + + p.end_or_abort(NodeKind::Closure); + return; } // Find out which kind of collection this is. - Some(match items.as_slice() { - [] => array(p, items, span), - [CallArg::Pos(_)] if !has_comma => match items.into_iter().next() { - Some(CallArg::Pos(expr)) => Expr::Group(Box::new(GroupExpr { span, expr })), - _ => unreachable!(), - }, - [CallArg::Pos(_), ..] => array(p, items, span), - [CallArg::Named(_), ..] => dict(p, items, span), - [CallArg::Spread(expr), ..] => { - p.error(expr.span(), "spreading is not allowed here"); - return None; + match kind { + CollectionKind::Group => p.end(NodeKind::Group), + CollectionKind::PositionalCollection => { + p.lift(); + array(p, token_count); } - }) + CollectionKind::NamedCollection => { + p.lift(); + dict(p, token_count); + } + } +} + +/// The type of a collection. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum CollectionKind { + /// The collection is only one item and has no comma. + Group, + /// The collection starts with a positional and has more items or a trailing + /// comma. + PositionalCollection, + /// The collection starts with a named item. + NamedCollection, } /// Parse a collection. /// -/// Returns whether the literal contained any commas. -fn collection(p: &mut Parser) -> (Vec, bool) { - let mut items = vec![]; +/// Returns the length of the collection and whether the literal contained any +/// commas. +fn collection(p: &mut Parser) -> (CollectionKind, usize) { + let mut items = 0; + let mut kind = CollectionKind::PositionalCollection; + let mut seen_spread = false; let mut has_comma = false; let mut missing_coma = None; while !p.eof() { - if let Some(arg) = item(p) { - items.push(arg); + let item_kind = item(p); + if p.success() { + if items == 0 && item_kind == CollectionItemKind::Named { + kind = CollectionKind::NamedCollection; + } + + if item_kind == CollectionItemKind::ParameterSink { + seen_spread = true; + } + + items += 1; if let Some(pos) = missing_coma.take() { - p.expected_at(pos, "comma"); + p.expected_at_child(pos, "comma"); } if p.eof() { break; } - let behind = p.prev_end(); - if p.eat_if(Token::Comma) { + if p.eat_if(NodeKind::Comma) { has_comma = true; } else { - missing_coma = Some(behind); + missing_coma = Some(p.child_count()); } } } - (items, has_comma) -} - -/// Parse an expression or a named pair. -fn item(p: &mut Parser) -> Option { - if p.eat_if(Token::Dots) { - return expr(p).map(CallArg::Spread); + if !has_comma + && items == 1 + && !seen_spread + && kind == CollectionKind::PositionalCollection + { + kind = CollectionKind::Group; } - let first = expr(p)?; - if p.eat_if(Token::Colon) { - if let Expr::Ident(name) = first { - Some(CallArg::Named(Named { name: *name, expr: expr(p)? })) - } else { - p.error(first.span(), "expected identifier"); + (kind, items) +} + +/// What kind of item is this? +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum CollectionItemKind { + /// A named item. + Named, + /// An unnamed item. + Unnamed, + /// A parameter sink. + ParameterSink, +} + +/// Parse an expression or a named pair. Returns if this is a named pair. +fn item(p: &mut Parser) -> CollectionItemKind { + p.start(); + if p.eat_if(NodeKind::Dots) { + expr(p); + + p.end_or_abort(NodeKind::ParameterSink); + return CollectionItemKind::ParameterSink; + } + + expr(p); + + if p.may_lift_abort() { + return CollectionItemKind::Unnamed; + } + + if p.eat_if(NodeKind::Colon) { + let child = p.child(1).unwrap(); + if matches!(child.kind(), &NodeKind::Ident(_)) { expr(p); - None + p.end_or_abort(NodeKind::Named); + } else { + p.wrap( + 1, + NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), + ); + + expr(p); + p.end(NodeKind::Named); + p.unsuccessful(); } + + CollectionItemKind::Named } else { - Some(CallArg::Pos(first)) + p.lift(); + CollectionItemKind::Unnamed } } /// Convert a collection into an array, producing errors for anything other than /// expressions. -fn array(p: &mut Parser, items: Vec, span: Span) -> Expr { - let iter = items.into_iter().filter_map(|item| match item { - CallArg::Pos(expr) => Some(expr), - CallArg::Named(_) => { - p.error(item.span(), "expected expression, found named pair"); - None - } - CallArg::Spread(_) => { - p.error(item.span(), "spreading is not allowed here"); - None - } - }); - Expr::Array(Box::new(ArrayExpr { span, items: iter.collect() })) +fn array(p: &mut Parser, items: usize) { + p.start_with(items); + p.filter_children( + 0, + |x| match x.kind() { + NodeKind::Named | NodeKind::ParameterSink => false, + _ => true, + }, + |kind| match kind { + NodeKind::Named => ( + ErrorPosition::Full, + "expected expression, found named pair".into(), + ), + NodeKind::ParameterSink => { + (ErrorPosition::Full, "spreading is not allowed here".into()) + } + _ => unreachable!(), + }, + ); + + p.end(NodeKind::Array) } /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. -fn dict(p: &mut Parser, items: Vec, span: Span) -> Expr { - let iter = items.into_iter().filter_map(|item| match item { - CallArg::Named(named) => Some(named), - CallArg::Pos(_) => { - p.error(item.span(), "expected named pair, found expression"); - None - } - CallArg::Spread(_) => { - p.error(item.span(), "spreading is not allowed here"); - None - } - }); - Expr::Dict(Box::new(DictExpr { span, items: iter.collect() })) +fn dict(p: &mut Parser, items: usize) { + p.start_with(items); + p.filter_children( + 0, + |x| { + x.kind() == &NodeKind::Named + || x.kind().is_parenthesis() + || x.kind() == &NodeKind::Comma + || x.kind() == &NodeKind::Colon + }, + |kind| match kind { + NodeKind::ParameterSink => { + (ErrorPosition::Full, "spreading is not allowed here".into()) + } + _ => ( + ErrorPosition::Full, + "expected named pair, found expression".into(), + ), + }, + ); + p.end(NodeKind::Dict); } /// Convert a collection into a list of parameters, producing errors for /// anything other than identifiers, spread operations and named pairs. -fn params(p: &mut Parser, items: Vec) -> Vec { - let iter = items.into_iter().filter_map(|item| match item { - CallArg::Pos(Expr::Ident(ident)) => Some(ClosureParam::Pos(*ident)), - CallArg::Named(named) => Some(ClosureParam::Named(named)), - CallArg::Spread(Expr::Ident(ident)) => Some(ClosureParam::Sink(*ident)), - _ => { - p.error(item.span(), "expected identifier"); - None - } - }); - iter.collect() -} - -/// Convert a collection into a list of identifiers, producing errors for -/// anything other than identifiers. -fn idents(p: &mut Parser, items: Vec) -> Vec { - let iter = items.into_iter().filter_map(|item| match item { - CallArg::Pos(Expr::Ident(ident)) => Some(*ident), - _ => { - p.error(item.span(), "expected identifier"); - None - } - }); - iter.collect() +fn params(p: &mut Parser, count: usize, allow_parens: bool) { + p.filter_children( + count, + |x| match x.kind() { + NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => true, + NodeKind::ParameterSink => matches!( + x.children().last().map(|x| x.kind()), + Some(&NodeKind::Ident(_)) + ), + _ => false, + } + || (allow_parens && x.kind().is_parenthesis()), + |_| (ErrorPosition::Full, "expected identifier".into()), + ); } // Parse a template block: `[...]`. -fn template(p: &mut Parser) -> Expr { +fn template(p: &mut Parser) { + p.start(); p.start_group(Group::Bracket, TokenMode::Markup); - let tree = markup(p); - let span = p.end_group(); - Expr::Template(Box::new(TemplateExpr { span, body: tree })) + markup(p); + p.end_group(); + p.end(NodeKind::Template); } /// Parse a code block: `{...}`. -fn block(p: &mut Parser) -> Expr { +fn block(p: &mut Parser) { + p.start(); p.start_group(Group::Brace, TokenMode::Code); - let mut exprs = vec![]; while !p.eof() { p.start_group(Group::Stmt, TokenMode::Code); - if let Some(expr) = expr(p) { - exprs.push(expr); + expr(p); + if p.success() { if !p.eof() { - p.expected_at(p.prev_end(), "semicolon or line break"); + p.expected_at("semicolon or line break"); } } p.end_group(); // Forcefully skip over newlines since the group's contents can't. - p.eat_while(|t| matches!(t, Token::Space(_))); + p.eat_while(|t| matches!(t, NodeKind::Space(_))); } - let span = p.end_group(); - Expr::Block(Box::new(BlockExpr { span, exprs })) + p.end_group(); + p.end(NodeKind::Block); } /// Parse a function call. -fn call(p: &mut Parser, callee: Expr) -> Option { - let mut args = match p.peek_direct() { - Some(Token::LeftParen) => args(p), - Some(Token::LeftBracket) => CallArgs { - span: Span::at(p.id(), callee.span().end), - items: vec![], - }, +fn call(p: &mut Parser, callee: usize) { + p.start_with(callee); + match p.peek_direct() { + Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => args(p, true), _ => { - p.expected_at(p.prev_end(), "argument list"); - return None; + p.expected_at("argument list"); + p.may_end_abort(NodeKind::Call); + return; } }; - while p.peek_direct() == Some(Token::LeftBracket) { - let body = template(p); - args.items.push(CallArg::Pos(body)); - } - - Some(Expr::Call(Box::new(CallExpr { - span: p.span_from(callee.span().start), - callee, - args, - }))) + p.end(NodeKind::Call); } /// Parse the arguments to a function call. -fn args(p: &mut Parser) -> CallArgs { - p.start_group(Group::Paren, TokenMode::Code); - let items = collection(p).0; - let span = p.end_group(); - CallArgs { span, items } +fn args(p: &mut Parser, allow_template: bool) { + p.start(); + if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { + p.start_group(Group::Paren, TokenMode::Code); + collection(p); + p.end_group(); + } + + while allow_template && p.peek_direct() == Some(&NodeKind::LeftBracket) { + template(p); + } + + p.end(NodeKind::CallArgs); } /// Parse a with expression. -fn with_expr(p: &mut Parser, callee: Expr) -> Option { - if p.peek() == Some(Token::LeftParen) { - Some(Expr::With(Box::new(WithExpr { - span: p.span_from(callee.span().start), - callee, - args: args(p), - }))) +fn with_expr(p: &mut Parser, preserve: usize) { + p.start_with(preserve); + p.eat_assert(NodeKind::With); + + if p.peek() == Some(NodeKind::LeftParen) { + args(p, false); + p.end(NodeKind::WithExpr); } else { p.expected("argument list"); - None + p.may_end_abort(NodeKind::WithExpr); } } /// Parse a let expression. -fn let_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::Let); +fn let_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::Let); - let mut output = None; - if let Some(binding) = ident(p) { - let mut init = None; - - if p.eat_if(Token::With) { - init = with_expr(p, Expr::Ident(Box::new(binding.clone()))); - } else { - // If a parenthesis follows, this is a function definition. - let mut maybe_params = None; - if p.peek_direct() == Some(Token::LeftParen) { - p.start_group(Group::Paren, TokenMode::Code); - let items = collection(p).0; - maybe_params = Some(params(p, items)); - p.end_group(); - } - - if p.eat_if(Token::Eq) { - init = expr(p); - } else if maybe_params.is_some() { - // Function definitions must have a body. - p.expected_at(p.prev_end(), "body"); - } - - // Rewrite into a closure expression if it's a function definition. - if let Some(params) = maybe_params { - let body = init?; - init = Some(Expr::Closure(Box::new(ClosureExpr { - span: binding.span.join(body.span()), - name: Some(binding.clone()), - params, - body: Rc::new(body), - }))); - } - } - - output = Some(Expr::Let(Box::new(LetExpr { - span: p.span_from(start), - binding, - init, - }))); + let offset = p.child_count(); + ident(p); + if p.may_end_abort(NodeKind::LetExpr) { + return; } - output + if p.peek() == Some(NodeKind::With) { + with_expr(p, p.child_count() - offset); + } else { + // If a parenthesis follows, this is a function definition. + let has_params = if p.peek_direct() == Some(&NodeKind::LeftParen) { + p.start(); + p.start_group(Group::Paren, TokenMode::Code); + let offset = p.child_count(); + collection(p); + params(p, offset, true); + p.end_group(); + p.end(NodeKind::ClosureParams); + true + } else { + false + }; + + if p.eat_if(NodeKind::Eq) { + expr(p); + } else if has_params { + // Function definitions must have a body. + p.expected_at("body"); + } + + // Rewrite into a closure expression if it's a function definition. + if has_params { + if p.may_end_abort(NodeKind::LetExpr) { + return; + } + + p.start_with(p.child_count() - offset); + p.end(NodeKind::Closure) + } + } + + p.end(NodeKind::LetExpr); } /// Parse an if expresion. -fn if_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::If); +fn if_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::If); - let mut output = None; - if let Some(condition) = expr(p) { - if let Some(if_body) = body(p) { - let mut else_body = None; - if p.eat_if(Token::Else) { - if p.peek() == Some(Token::If) { - else_body = if_expr(p); - } else { - else_body = body(p); - } - } + expr(p); + if p.may_end_abort(NodeKind::IfExpr) { + return; + } - output = Some(Expr::If(Box::new(IfExpr { - span: p.span_from(start), - condition, - if_body, - else_body, - }))); + body(p); + if p.may_end_abort(NodeKind::IfExpr) { + // Expected function body. + return; + } + + if p.eat_if(NodeKind::Else) { + if p.peek() == Some(NodeKind::If) { + if_expr(p); + } else { + body(p); } } - output + p.end(NodeKind::IfExpr); } /// Parse a while expresion. -fn while_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::While); +fn while_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::While); - let mut output = None; - if let Some(condition) = expr(p) { - if let Some(body) = body(p) { - output = Some(Expr::While(Box::new(WhileExpr { - span: p.span_from(start), - condition, - body, - }))); - } + expr(p); + + if p.may_end_abort(NodeKind::WhileExpr) { + return; } - output + body(p); + if !p.may_end_abort(NodeKind::WhileExpr) { + p.end(NodeKind::WhileExpr); + } } /// Parse a for expression. -fn for_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::For); +fn for_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::For); - let mut output = None; - if let Some(pattern) = for_pattern(p) { - if p.eat_expect(Token::In) { - if let Some(iter) = expr(p) { - if let Some(body) = body(p) { - output = Some(Expr::For(Box::new(ForExpr { - span: p.span_from(start), - pattern, - iter, - body, - }))); - } - } - } + for_pattern(p); + + if p.may_end_abort(NodeKind::ForExpr) { + return; } - output + if p.eat_expect(NodeKind::In) { + expr(p); + + if p.may_end_abort(NodeKind::ForExpr) { + return; + } + + body(p); + + if !p.may_end_abort(NodeKind::ForExpr) { + p.end(NodeKind::ForExpr); + } + } else { + p.unsuccessful(); + p.may_end_abort(NodeKind::ForExpr); + } } /// Parse a for loop pattern. -fn for_pattern(p: &mut Parser) -> Option { - let first = ident(p)?; - if p.eat_if(Token::Comma) { - if let Some(second) = ident(p) { - return Some(ForPattern::KeyValue(first, second)); +fn for_pattern(p: &mut Parser) { + p.start(); + ident(p); + + if p.may_end_abort(NodeKind::ForPattern) { + return; + } + + if p.peek() == Some(NodeKind::Comma) { + p.eat(); + + ident(p); + + if p.may_end_abort(NodeKind::ForPattern) { + return; } } - Some(ForPattern::Value(first)) + + p.end(NodeKind::ForPattern); } /// Parse an import expression. -fn import_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::Import); +fn import_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::Import); - let imports = if p.eat_if(Token::Star) { - // This is the wildcard scenario. - Imports::Wildcard - } else { + if !p.eat_if(NodeKind::Star) { // This is the list of identifiers scenario. + p.start(); p.start_group(Group::Imports, TokenMode::Code); - let items = collection(p).0; - if items.is_empty() { - p.expected_at(p.prev_end(), "import items"); + let offset = p.child_count(); + let items = collection(p).1; + if items == 0 { + p.expected_at("import items"); } p.end_group(); - Imports::Idents(idents(p, items)) + + p.filter_children( + offset, + |n| matches!(n.kind(), NodeKind::Ident(_) | NodeKind::Comma), + |_| (ErrorPosition::Full, "expected identifier".into()), + ); + p.end(NodeKind::ImportItems); }; - let mut output = None; - if p.eat_expect(Token::From) { - if let Some(path) = expr(p) { - output = Some(Expr::Import(Box::new(ImportExpr { - span: p.span_from(start), - imports, - path, - }))); - } + if p.eat_expect(NodeKind::From) { + expr(p); } - output + p.end(NodeKind::ImportExpr); } /// Parse an include expression. -fn include_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::Include); +fn include_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::Include); - expr(p).map(|path| { - Expr::Include(Box::new(IncludeExpr { span: p.span_from(start), path })) - }) + expr(p); + p.end(NodeKind::IncludeExpr); } /// Parse an identifier. -fn ident(p: &mut Parser) -> Option { - if let Some(Token::Ident(string)) = p.peek() { - Some(Ident { - span: p.eat_span(), - string: string.into(), - }) +fn ident(p: &mut Parser) { + if let Some(NodeKind::Ident(_)) = p.peek() { + p.eat(); } else { p.expected("identifier"); - None + p.unsuccessful(); } } /// Parse a control flow body. -fn body(p: &mut Parser) -> Option { +fn body(p: &mut Parser) { match p.peek() { - Some(Token::LeftBracket) => Some(template(p)), - Some(Token::LeftBrace) => Some(block(p)), + Some(NodeKind::LeftBracket) => template(p), + Some(NodeKind::LeftBrace) => block(p), _ => { - p.expected_at(p.prev_end(), "body"); - None + p.expected_at("body"); + p.unsuccessful(); } } } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 347d6f715..f62e882af 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,29 +1,34 @@ use std::ops::Range; +use std::rc::Rc; use super::{TokenMode, Tokens}; -use crate::diag::Error; use crate::source::{SourceFile, SourceId}; -use crate::syntax::{IntoSpan, Pos, Span, Token}; +use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; +use crate::util::EcoString; /// A convenient token-based parser. pub struct Parser<'s> { /// The parsed file. source: &'s SourceFile, - /// Parsing errors. - errors: Vec, /// An iterator over the source tokens. tokens: Tokens<'s>, /// The stack of open groups. groups: Vec, /// The next token. - next: Option>, + next: Option, /// The peeked token. /// (Same as `next` except if we are at the end of group, then `None`). - peeked: Option>, + peeked: Option, /// The end index of the last (non-whitespace if in code mode) token. prev_end: usize, /// The start index of the peeked token. next_start: usize, + /// A stack of outer children vectors. + stack: Vec>, + /// The children of the currently built node. + children: Vec, + /// Whether the last parsing step was successful. + success: bool, } /// A logical group of tokens, e.g. `[...]`. @@ -32,9 +37,6 @@ struct GroupEntry { /// For example, a [`Group::Paren`] will be ended by /// [`Token::RightParen`]. pub kind: Group, - /// The start index of the group. Used by `Parser::end_group` to return the - /// group's full span. - pub start: usize, /// The mode the parser was in _before_ the group started (to which we go /// back once the group ends). pub prev_mode: TokenMode, @@ -60,51 +62,204 @@ pub enum Group { impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(source: &'s SourceFile) -> Self { - let mut tokens = Tokens::new(source.src(), TokenMode::Markup); + let mut tokens = Tokens::new(source, TokenMode::Markup); let next = tokens.next(); Self { source, - errors: vec![], tokens, groups: vec![], - next, + next: next.clone(), peeked: next, prev_end: 0, next_start: 0, + stack: vec![], + children: vec![], + success: true, } } - /// Finish parsing and return all errors. - pub fn finish(self) -> Vec { - self.errors - } - /// The id of the parsed source file. pub fn id(&self) -> SourceId { self.source.id() } + /// Start a nested node. + /// + /// Each start call has to be matched with a call to `end`, + /// `end_with_custom_children`, `lift`, `abort`, or `end_or_abort`. + pub fn start(&mut self) { + self.stack.push(std::mem::take(&mut self.children)); + } + + /// Start a nested node, preserving a number of the current children. + pub fn start_with(&mut self, preserve: usize) { + let preserved = self.children.drain(self.children.len() - preserve ..).collect(); + self.stack.push(std::mem::replace(&mut self.children, preserved)); + } + + /// Filter the last children using the given predicate. + pub fn filter_children(&mut self, count: usize, f: F, error: G) + where + F: Fn(&Green) -> bool, + G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + { + for child in &mut self.children[count ..] { + if !((self.tokens.mode() != TokenMode::Code + || Self::skip_type_ext(child.kind(), false)) + || child.kind().is_error() + || f(&child)) + { + let (pos, msg) = error(child.kind()); + let inner = std::mem::take(child); + *child = + GreenNode::with_child(NodeKind::Error(pos, msg), inner.len(), inner) + .into(); + } + } + } + + pub fn child(&self, child: usize) -> Option<&Green> { + self.node_index_from_back(child).map(|i| &self.children[i]) + } + + fn node_index_from_back(&self, child: usize) -> Option { + let len = self.children.len(); + let code = self.tokens.mode() == TokenMode::Code; + let mut seen = 0; + for x in (0 .. len).rev() { + if self.skip_type(self.children[x].kind()) && code { + continue; + } + if seen == child { + return Some(x); + } + seen += 1; + } + + None + } + + /// End the current node as a node of given `kind`. + pub fn end(&mut self, kind: NodeKind) { + let outer = self.stack.pop().unwrap(); + let mut children = std::mem::replace(&mut self.children, outer); + + // have trailing whitespace continue to sit in self.children in code + // mode. + let mut remains = vec![]; + if self.tokens.mode() == TokenMode::Code { + let len = children.len(); + for n in (0 .. len).rev() { + if !self.skip_type(&children[n].kind()) { + break; + } + + remains.push(children.pop().unwrap()); + } + remains.reverse(); + } + + let len = children.iter().map(|c| c.len()).sum(); + self.children + .push(GreenNode::with_children(kind, len, children.into_iter()).into()); + self.children.extend(remains); + self.success = true; + } + + /// End the current node as a node of given `kind`, and start a new node + /// with the ended node as a first child. The function returns how many + /// children the stack frame had before and how many were appended (accounts + /// for trivia). + pub fn end_and_start_with(&mut self, kind: NodeKind) -> (usize, usize) { + let stack_offset = self.stack.last().unwrap().len(); + self.end(kind); + let diff = self.children.len() - stack_offset; + self.start_with(diff); + (stack_offset, diff) + } + + pub fn wrap(&mut self, index: usize, kind: NodeKind) { + let index = self.node_index_from_back(index).unwrap(); + let child = std::mem::take(&mut self.children[index]); + let item = GreenNode::with_child(kind, child.len(), child); + self.children[index] = item.into(); + } + + pub fn convert(&mut self, kind: NodeKind) { + self.start(); + self.eat(); + self.end(kind); + } + + /// End the current node and undo its existence, inling all accumulated + /// children into its parent. + pub fn lift(&mut self) { + let outer = self.stack.pop().unwrap(); + let children = std::mem::replace(&mut self.children, outer); + self.children.extend(children); + self.success = true; + } + + /// End the current node and undo its existence, deleting all accumulated + /// children. + pub fn abort(&mut self, msg: impl Into) { + self.end(NodeKind::Error(ErrorPosition::Full, msg.into().into())); + self.success = false; + } + + pub fn may_lift_abort(&mut self) -> bool { + if !self.success { + self.lift(); + self.success = false; + true + } else { + false + } + } + + pub fn may_end_abort(&mut self, kind: NodeKind) -> bool { + if !self.success { + self.end(kind); + self.success = false; + true + } else { + false + } + } + + /// End the current node as a node of given `kind` if the last parse was + /// successful, otherwise, abort. + pub fn end_or_abort(&mut self, kind: NodeKind) -> bool { + if self.success { + self.end(kind); + true + } else { + self.may_end_abort(kind); + false + } + } + + pub fn finish(&mut self) -> Rc { + if let Green::Node(n) = self.children.pop().unwrap() { + n + } else { + panic!() + } + } + /// Whether the end of the source string or group is reached. pub fn eof(&self) -> bool { self.peek().is_none() } - /// Consume the next token. - pub fn eat(&mut self) -> Option> { + pub fn eat(&mut self) -> Option { let token = self.peek()?; self.bump(); Some(token) } - /// Eat the next token and return its source range. - pub fn eat_span(&mut self) -> Span { - let start = self.next_start(); - self.eat(); - Span::new(self.id(), start, self.prev_end()) - } - /// Consume the next token if it is the given one. - pub fn eat_if(&mut self, t: Token) -> bool { + pub fn eat_if(&mut self, t: NodeKind) -> bool { if self.peek() == Some(t) { self.bump(); true @@ -116,7 +271,7 @@ impl<'s> Parser<'s> { /// Consume the next token if the closure maps it a to `Some`-variant. pub fn eat_map(&mut self, f: F) -> Option where - F: FnOnce(Token<'s>) -> Option, + F: FnOnce(NodeKind) -> Option, { let token = self.peek()?; let mapped = f(token); @@ -128,16 +283,16 @@ impl<'s> Parser<'s> { /// Consume the next token if it is the given one and produce an error if /// not. - pub fn eat_expect(&mut self, t: Token) -> bool { - let eaten = self.eat_if(t); + pub fn eat_expect(&mut self, t: NodeKind) -> bool { + let eaten = self.eat_if(t.clone()); if !eaten { - self.expected_at(self.prev_end(), t.name()); + self.expected_at(&t.to_string()); } eaten } /// Consume the next token, debug-asserting that it is one of the given ones. - pub fn eat_assert(&mut self, t: Token) { + pub fn eat_assert(&mut self, t: NodeKind) { let next = self.eat(); debug_assert_eq!(next, Some(t)); } @@ -145,7 +300,7 @@ impl<'s> Parser<'s> { /// Consume tokens while the condition is true. pub fn eat_while(&mut self, mut f: F) where - F: FnMut(Token<'s>) -> bool, + F: FnMut(NodeKind) -> bool, { while self.peek().map_or(false, |t| f(t)) { self.eat(); @@ -153,42 +308,25 @@ impl<'s> Parser<'s> { } /// Peek at the next token without consuming it. - pub fn peek(&self) -> Option> { - self.peeked + pub fn peek(&self) -> Option { + self.peeked.clone() } /// Peek at the next token if it follows immediately after the last one /// without any whitespace in between. - pub fn peek_direct(&self) -> Option> { + pub fn peek_direct(&self) -> Option<&NodeKind> { if self.next_start() == self.prev_end() { - self.peeked + self.peeked.as_ref() } else { None } } - /// Peek at the span of the next token. - /// - /// Has length zero if `peek()` returns `None`. - pub fn peek_span(&self) -> Span { - Span::new(self.id(), self.next_start(), self.next_end()) - } - /// Peek at the source of the next token. pub fn peek_src(&self) -> &'s str { self.get(self.next_start() .. self.next_end()) } - /// Checks whether the next token fulfills a condition. - /// - /// Returns `false` if there is no next token. - pub fn check(&self, f: F) -> bool - where - F: FnOnce(Token<'s>) -> bool, - { - self.peek().map_or(false, f) - } - /// The byte index at which the last token ended. /// /// Refers to the end of the last _non-whitespace_ token in code mode. @@ -219,11 +357,6 @@ impl<'s> Parser<'s> { self.source.get(range).unwrap() } - /// The span from `start` to [`self.prev_end()`](Self::prev_end). - pub fn span_from(&self, start: impl Into) -> Span { - Span::new(self.id(), start, self.prev_end()) - } - /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to @@ -232,19 +365,15 @@ impl<'s> Parser<'s> { /// /// This panics if the next token does not start the given group. pub fn start_group(&mut self, kind: Group, mode: TokenMode) { - self.groups.push(GroupEntry { - kind, - start: self.next_start(), - prev_mode: self.tokens.mode(), - }); + self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); self.tokens.set_mode(mode); self.repeek(); match kind { - Group::Paren => self.eat_assert(Token::LeftParen), - Group::Bracket => self.eat_assert(Token::LeftBracket), - Group::Brace => self.eat_assert(Token::LeftBrace), + Group::Paren => self.eat_assert(NodeKind::LeftParen), + Group::Bracket => self.eat_assert(NodeKind::LeftBracket), + Group::Brace => self.eat_assert(NodeKind::LeftBrace), Group::Stmt => {} Group::Expr => {} Group::Imports => {} @@ -254,7 +383,7 @@ impl<'s> Parser<'s> { /// End the parsing of a group. /// /// This panics if no group was started. - pub fn end_group(&mut self) -> Span { + pub fn end_group(&mut self) { let prev_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); @@ -264,83 +393,125 @@ impl<'s> Parser<'s> { // Eat the end delimiter if there is one. if let Some((end, required)) = match group.kind { - Group::Paren => Some((Token::RightParen, true)), - Group::Bracket => Some((Token::RightBracket, true)), - Group::Brace => Some((Token::RightBrace, true)), - Group::Stmt => Some((Token::Semicolon, false)), + Group::Paren => Some((NodeKind::RightParen, true)), + Group::Bracket => Some((NodeKind::RightBracket, true)), + Group::Brace => Some((NodeKind::RightBrace, true)), + Group::Stmt => Some((NodeKind::Semicolon, false)), Group::Expr => None, Group::Imports => None, } { - if self.next == Some(end) { + if self.next == Some(end.clone()) { // Bump the delimeter and return. No need to rescan in this case. self.bump(); rescan = false; } else if required { - self.error( - self.next_start() .. self.next_start(), - format!("expected {}", end.name()), - ); + self.start(); + self.abort(format!("expected {}", end.to_string())); } } // Rescan the peeked token if the mode changed. if rescan { self.tokens.jump(self.prev_end()); - self.bump(); + + if prev_mode == TokenMode::Code { + let len = self.children.len(); + for n in (0 .. len).rev() { + if !self.skip_type(self.children[n].kind()) { + break; + } + + self.children.pop(); + } + } + + self.fast_forward(); } - - Span::new(self.id(), group.start, self.prev_end()) - } - - /// Add an error with location and message. - pub fn error(&mut self, span: impl IntoSpan, message: impl Into) { - self.errors.push(Error::new(span.into_span(self.id()), message)); } /// Add an error that `what` was expected at the given span. - pub fn expected_at(&mut self, span: impl IntoSpan, what: &str) { - self.error(span, format!("expected {}", what)); + pub fn expected_at(&mut self, what: &str) { + let mut found = self.children.len(); + for (i, node) in self.children.iter().enumerate().rev() { + if !self.skip_type(node.kind()) { + break; + } + found = i; + } + + self.expected_at_child(found, what); + } + + /// Add an error that `what` was expected at the given child index. + pub fn expected_at_child(&mut self, index: usize, what: &str) { + self.children.insert( + index, + GreenData::new( + NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), + 0, + ) + .into(), + ); } /// Eat the next token and add an error that it is not the expected `thing`. pub fn expected(&mut self, what: &str) { - let before = self.next_start(); + self.start(); if let Some(found) = self.eat() { - let after = self.prev_end(); - self.error( - before .. after, - format!("expected {}, found {}", what, found.name()), - ); + self.abort(format!("expected {}, found {}", what, found.to_string())) } else { - self.expected_at(self.next_start(), what); + self.lift(); + self.expected_at(what); } } /// Eat the next token and add an error that it is unexpected. pub fn unexpected(&mut self) { - let before = self.next_start(); + self.start(); if let Some(found) = self.eat() { - let after = self.prev_end(); - self.error(before .. after, format!("unexpected {}", found.name())); + self.abort(format!("unexpected {}", found.to_string())) + } else { + self.abort("unexpected end of file") } } + pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { + match token { + NodeKind::Space(n) => n < &1 || !stop_at_newline, + NodeKind::LineComment => true, + NodeKind::BlockComment => true, + _ => false, + } + } + + fn skip_type(&self, token: &NodeKind) -> bool { + Self::skip_type_ext(token, self.stop_at_newline()) + } + /// Move to the next token. fn bump(&mut self) { - self.prev_end = self.tokens.index().into(); + self.children.push( + GreenData::new( + self.next.clone().unwrap(), + self.tokens.index() - self.next_start, + ) + .into(), + ); + + self.fast_forward(); + } + + pub fn fast_forward(&mut self) { + if !self.next.as_ref().map_or(false, |x| self.skip_type(x)) { + self.prev_end = self.tokens.index().into(); + } self.next_start = self.tokens.index().into(); self.next = self.tokens.next(); if self.tokens.mode() == TokenMode::Code { // Skip whitespace and comments. - while match self.next { - Some(Token::Space(n)) => n < 1 || !self.stop_at_newline(), - Some(Token::LineComment(_)) => true, - Some(Token::BlockComment(_)) => true, - _ => false, - } { - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); + while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { + self.bump(); } } @@ -349,19 +520,19 @@ impl<'s> Parser<'s> { /// Take another look at the next token to recheck whether it ends a group. fn repeek(&mut self) { - self.peeked = self.next; - let token = match self.next { + self.peeked = self.next.clone(); + let token = match self.next.as_ref() { Some(token) => token, None => return, }; if match token { - Token::RightParen => self.inside(Group::Paren), - Token::RightBracket => self.inside(Group::Bracket), - Token::RightBrace => self.inside(Group::Brace), - Token::Semicolon => self.inside(Group::Stmt), - Token::From => self.inside(Group::Imports), - Token::Space(n) => n >= 1 && self.stop_at_newline(), + NodeKind::RightParen => self.inside(Group::Paren), + NodeKind::RightBracket => self.inside(Group::Bracket), + NodeKind::RightBrace => self.inside(Group::Brace), + NodeKind::Semicolon => self.inside(Group::Stmt), + NodeKind::From => self.inside(Group::Imports), + NodeKind::Space(n) => n > &0 && self.stop_at_newline(), _ => false, } { self.peeked = None; @@ -380,4 +551,22 @@ impl<'s> Parser<'s> { fn inside(&self, kind: Group) -> bool { self.groups.iter().any(|g| g.kind == kind) } + + pub fn last_child(&self) -> Option<&Green> { + self.children.last() + } + + pub fn success(&mut self) -> bool { + let s = self.success; + self.success = true; + s + } + + pub fn unsuccessful(&mut self) { + self.success = false; + } + + pub fn child_count(&self) -> usize { + self.children.len() + } } diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 1b3238472..c59c3bb17 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,5 +1,5 @@ use super::{is_newline, Scanner}; -use crate::syntax::{Ident, RawNode, Span}; +use crate::syntax::RawToken; use crate::util::EcoString; /// Resolve all escape sequences in a string. @@ -48,21 +48,28 @@ pub fn resolve_hex(sequence: &str) -> Option { } /// Resolve the language tag and trims the raw text. -pub fn resolve_raw(span: Span, column: usize, backticks: usize, text: &str) -> RawNode { +pub fn resolve_raw( + column: usize, + backticks: u8, + text: &str, + terminated: bool, +) -> RawToken { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); - RawNode { - span, - lang: Ident::new(tag, span.with_end(span.start + tag.len())), + RawToken { + lang: Some(tag.into()), text: text.into(), + backticks, + terminated, block, } } else { - RawNode { - span, + RawToken { lang: None, text: split_lines(text).join("\n").into(), + backticks, + terminated, block: false, } } @@ -140,7 +147,6 @@ fn split_lines(text: &str) -> Vec<&str> { #[cfg(test)] #[rustfmt::skip] mod tests { - use crate::syntax::Span; use super::*; #[test] @@ -175,8 +181,8 @@ mod tests { test("typst\n it!", "typst", "\n it!"); test("typst\n it!", "typst", "\n it!"); test("abc`", "abc", "`"); - test(" hi", "", " hi"); - test("`", "", "`"); + test(" hi", "", " hi"); + test("`", "", "`"); } #[test] @@ -184,13 +190,13 @@ mod tests { #[track_caller] fn test( column: usize, - backticks: usize, + backticks: u8, raw: &str, lang: Option<&str>, text: &str, block: bool, ) { - let node = resolve_raw(Span::detached(), column, backticks, raw); + let node = resolve_raw(column, backticks, raw, true); assert_eq!(node.lang.as_deref(), lang); assert_eq!(node.text, text); assert_eq!(node.block, block); @@ -204,15 +210,15 @@ mod tests { // More than one backtick with lang tag. test(0, 2, "js alert()", Some("js"), "alert()", false); test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true); - test(0, 2, "♥", None, "", false); + test(0, 2, "♥", Some("♥"), "", false); // Trimming of whitespace (tested more thoroughly in separate test). - test(0, 2, " a", None, "a", false); - test(0, 2, " a", None, " a", false); - test(0, 2, " \na", None, "a", true); + test(0, 2, " a", Some(""), "a", false); + test(0, 2, " a", Some(""), " a", false); + test(0, 2, " \na", Some(""), "a", true); // Dedenting - test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true); + test(2, 3, " def foo():\n bar()", Some(""), "def foo():\n bar()", true); } #[test] diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 5f9694528..19d0d77bb 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,9 +1,13 @@ -use super::{is_newline, Scanner}; +use super::{is_newline, resolve_raw, Scanner}; use crate::geom::{AngularUnit, LengthUnit}; +use crate::parse::resolve::{resolve_hex, resolve_string}; +use crate::source::SourceFile; use crate::syntax::*; +use crate::util::EcoString; /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { + source: &'s SourceFile, s: Scanner<'s>, mode: TokenMode, } @@ -20,8 +24,12 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] - pub fn new(src: &'s str, mode: TokenMode) -> Self { - Self { s: Scanner::new(src), mode } + pub fn new(source: &'s SourceFile, mode: TokenMode) -> Self { + Self { + s: Scanner::new(source.src()), + source, + mode, + } } /// Get the current token mode. @@ -59,7 +67,7 @@ impl<'s> Tokens<'s> { } impl<'s> Iterator for Tokens<'s> { - type Item = Token<'s>; + type Item = NodeKind; /// Parse the next token in the source code. #[inline] @@ -68,19 +76,21 @@ impl<'s> Iterator for Tokens<'s> { let c = self.s.eat()?; Some(match c { // Blocks and templates. - '[' => Token::LeftBracket, - ']' => Token::RightBracket, - '{' => Token::LeftBrace, - '}' => Token::RightBrace, + '[' => NodeKind::LeftBracket, + ']' => NodeKind::RightBracket, + '{' => NodeKind::LeftBrace, + '}' => NodeKind::RightBrace, // Whitespace. - ' ' if self.s.check_or(true, |c| !c.is_whitespace()) => Token::Space(0), + ' ' if self.s.check_or(true, |c| !c.is_whitespace()) => NodeKind::Space(0), c if c.is_whitespace() => self.whitespace(), // Comments with special case for URLs. '/' if self.s.eat_if('*') => self.block_comment(), '/' if !self.maybe_in_url() && self.s.eat_if('/') => self.line_comment(), - '*' if self.s.eat_if('/') => Token::Invalid(self.s.eaten_from(start)), + '*' if self.s.eat_if('/') => { + NodeKind::Error(ErrorPosition::Full, self.s.eaten_from(start).into()) + } // Other things. _ => match self.mode { @@ -93,7 +103,7 @@ impl<'s> Iterator for Tokens<'s> { impl<'s> Tokens<'s> { #[inline] - fn markup(&mut self, start: usize, c: char) -> Token<'s> { + fn markup(&mut self, start: usize, c: char) -> NodeKind { match c { // Escape sequences. '\\' => self.backslash(), @@ -102,13 +112,15 @@ impl<'s> Tokens<'s> { '#' => self.hash(), // Markup. - '~' => Token::Tilde, - '*' => Token::Star, - '_' => Token::Underscore, + '~' => NodeKind::NonBreakingSpace, + '*' => NodeKind::Strong, + '_' => NodeKind::Emph, '`' => self.raw(), '$' => self.math(), - '-' => self.hyph(start), - '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => Token::Eq, + '-' => self.hyph(), + '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => { + NodeKind::Eq + } c if c == '.' || c.is_ascii_digit() => self.numbering(start, c), // Plain text. @@ -116,35 +128,35 @@ impl<'s> Tokens<'s> { } } - fn code(&mut self, start: usize, c: char) -> Token<'s> { + fn code(&mut self, start: usize, c: char) -> NodeKind { match c { // Parens. - '(' => Token::LeftParen, - ')' => Token::RightParen, + '(' => NodeKind::LeftParen, + ')' => NodeKind::RightParen, // Length two. - '=' if self.s.eat_if('=') => Token::EqEq, - '!' if self.s.eat_if('=') => Token::ExclEq, - '<' if self.s.eat_if('=') => Token::LtEq, - '>' if self.s.eat_if('=') => Token::GtEq, - '+' if self.s.eat_if('=') => Token::PlusEq, - '-' if self.s.eat_if('=') => Token::HyphEq, - '*' if self.s.eat_if('=') => Token::StarEq, - '/' if self.s.eat_if('=') => Token::SlashEq, - '.' if self.s.eat_if('.') => Token::Dots, - '=' if self.s.eat_if('>') => Token::Arrow, + '=' if self.s.eat_if('=') => NodeKind::EqEq, + '!' if self.s.eat_if('=') => NodeKind::ExclEq, + '<' if self.s.eat_if('=') => NodeKind::LtEq, + '>' if self.s.eat_if('=') => NodeKind::GtEq, + '+' if self.s.eat_if('=') => NodeKind::PlusEq, + '-' if self.s.eat_if('=') => NodeKind::HyphEq, + '*' if self.s.eat_if('=') => NodeKind::StarEq, + '/' if self.s.eat_if('=') => NodeKind::SlashEq, + '.' if self.s.eat_if('.') => NodeKind::Dots, + '=' if self.s.eat_if('>') => NodeKind::Arrow, // Length one. - ',' => Token::Comma, - ';' => Token::Semicolon, - ':' => Token::Colon, - '+' => Token::Plus, - '-' => Token::Hyph, - '*' => Token::Star, - '/' => Token::Slash, - '=' => Token::Eq, - '<' => Token::Lt, - '>' => Token::Gt, + ',' => NodeKind::Comma, + ';' => NodeKind::Semicolon, + ':' => NodeKind::Colon, + '+' => NodeKind::Plus, + '-' => NodeKind::Minus, + '*' => NodeKind::Star, + '/' => NodeKind::Slash, + '=' => NodeKind::Eq, + '<' => NodeKind::Lt, + '>' => NodeKind::Gt, // Identifiers. c if is_id_start(c) => self.ident(start), @@ -159,12 +171,12 @@ impl<'s> Tokens<'s> { // Strings. '"' => self.string(), - _ => Token::Invalid(self.s.eaten_from(start)), + _ => NodeKind::Error(ErrorPosition::Full, self.s.eaten_from(start).into()), } } #[inline] - fn text(&mut self, start: usize) -> Token<'s> { + fn text(&mut self, start: usize) -> NodeKind { macro_rules! table { ($($c:literal)|*) => {{ let mut t = [false; 128]; @@ -186,10 +198,10 @@ impl<'s> Tokens<'s> { TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace()) }); - Token::Text(self.s.eaten_from(start)) + NodeKind::Text(resolve_string(self.s.eaten_from(start))) } - fn whitespace(&mut self) -> Token<'s> { + fn whitespace(&mut self) -> NodeKind { self.s.uneat(); // Count the number of newlines. @@ -208,10 +220,10 @@ impl<'s> Tokens<'s> { } } - Token::Space(newlines) + NodeKind::Space(newlines) } - fn backslash(&mut self) -> Token<'s> { + fn backslash(&mut self) -> NodeKind { if let Some(c) = self.s.peek() { match c { // Backslash and comments. @@ -220,61 +232,61 @@ impl<'s> Tokens<'s> { '[' | ']' | '{' | '}' | '#' | // Markup. '*' | '_' | '=' | '~' | '`' | '$' => { - let start = self.s.index(); self.s.eat_assert(c); - Token::Text(&self.s.eaten_from(start)) + NodeKind::Text(c.into()) } 'u' if self.s.rest().starts_with("u{") => { self.s.eat_assert('u'); self.s.eat_assert('{'); - Token::UnicodeEscape(UnicodeEscapeToken { - // Allow more than `ascii_hexdigit` for better error recovery. - sequence: self.s.eat_while(|c| c.is_ascii_alphanumeric()), - terminated: self.s.eat_if('}'), + let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); + NodeKind::UnicodeEscape(UnicodeEscapeToken { + character: resolve_hex(&sequence), + sequence, + terminated: self.s.eat_if('}') }) } - c if c.is_whitespace() => Token::Backslash, - _ => Token::Text("\\"), + c if c.is_whitespace() => NodeKind::Linebreak, + _ => NodeKind::Text("\\".into()), } } else { - Token::Backslash + NodeKind::Linebreak } } #[inline] - fn hash(&mut self) -> Token<'s> { + fn hash(&mut self) -> NodeKind { if self.s.check_or(false, is_id_start) { let read = self.s.eat_while(is_id_continue); if let Some(keyword) = keyword(read) { keyword } else { - Token::Ident(read) + NodeKind::Ident(read.into()) } } else { - Token::Text("#") + NodeKind::Text("#".into()) } } - fn hyph(&mut self, start: usize) -> Token<'s> { + fn hyph(&mut self) -> NodeKind { if self.s.eat_if('-') { if self.s.eat_if('-') { - Token::HyphHyphHyph + NodeKind::EmDash } else { - Token::HyphHyph + NodeKind::EnDash } } else if self.s.check_or(true, char::is_whitespace) { - Token::Hyph + NodeKind::ListBullet } else { - Token::Text(self.s.eaten_from(start)) + NodeKind::Text("-".into()) } } - fn numbering(&mut self, start: usize, c: char) -> Token<'s> { + fn numbering(&mut self, start: usize, c: char) -> NodeKind { let number = if c != '.' { self.s.eat_while(|c| c.is_ascii_digit()); let read = self.s.eaten_from(start); if !self.s.eat_if('.') { - return Token::Text(read); + return NodeKind::Text(self.s.eaten_from(start).into()); } read.parse().ok() } else { @@ -282,21 +294,28 @@ impl<'s> Tokens<'s> { }; if self.s.check_or(true, char::is_whitespace) { - Token::Numbering(number) + NodeKind::EnumNumbering(number) } else { - Token::Text(self.s.eaten_from(start)) + NodeKind::Text(self.s.eaten_from(start).into()) } } - fn raw(&mut self) -> Token<'s> { + fn raw(&mut self) -> NodeKind { + let column = self.source.byte_to_column(self.s.index() - 1).unwrap(); let mut backticks = 1; - while self.s.eat_if('`') { + while self.s.eat_if('`') && backticks < u8::MAX { backticks += 1; } // Special case for empty inline block. if backticks == 2 { - return Token::Raw(RawToken { text: "", backticks: 1, terminated: true }); + return NodeKind::Raw(RawToken { + text: EcoString::new(), + lang: None, + backticks: 1, + terminated: true, + block: false, + }); } let start = self.s.index(); @@ -311,16 +330,17 @@ impl<'s> Tokens<'s> { } let terminated = found == backticks; - let end = self.s.index() - if terminated { found } else { 0 }; + let end = self.s.index() - if terminated { found as usize } else { 0 }; - Token::Raw(RawToken { - text: self.s.get(start .. end), + NodeKind::Raw(resolve_raw( + column, backticks, + self.s.get(start .. end).into(), terminated, - }) + )) } - fn math(&mut self) -> Token<'s> { + fn math(&mut self) -> NodeKind { let mut display = false; if self.s.eat_if('[') { display = true; @@ -350,25 +370,25 @@ impl<'s> Tokens<'s> { (true, true) => 2, }; - Token::Math(MathToken { - formula: self.s.get(start .. end), + NodeKind::Math(MathToken { + formula: self.s.get(start .. end).into(), display, terminated, }) } - fn ident(&mut self, start: usize) -> Token<'s> { + fn ident(&mut self, start: usize) -> NodeKind { self.s.eat_while(is_id_continue); match self.s.eaten_from(start) { - "none" => Token::None, - "auto" => Token::Auto, - "true" => Token::Bool(true), - "false" => Token::Bool(false), - id => keyword(id).unwrap_or(Token::Ident(id)), + "none" => NodeKind::None, + "auto" => NodeKind::Auto, + "true" => NodeKind::Bool(true), + "false" => NodeKind::Bool(false), + id => keyword(id).unwrap_or(NodeKind::Ident(id.into())), } } - fn number(&mut self, start: usize, c: char) -> Token<'s> { + fn number(&mut self, start: usize, c: char) -> NodeKind { // Read the first part (integer or fractional depending on `first`). self.s.eat_while(|c| c.is_ascii_digit()); @@ -380,7 +400,9 @@ impl<'s> Tokens<'s> { // Read the exponent. if self.s.eat_if('e') || self.s.eat_if('E') { - let _ = self.s.eat_if('+') || self.s.eat_if('-'); + if !self.s.eat_if('+') { + self.s.eat_if('-'); + } self.s.eat_while(|c| c.is_ascii_digit()); } @@ -396,55 +418,53 @@ impl<'s> Tokens<'s> { // Find out whether it is a simple number. if suffix.is_empty() { - if let Ok(int) = number.parse::() { - return Token::Int(int); - } else if let Ok(float) = number.parse::() { - return Token::Float(float); + if let Ok(i) = number.parse::() { + return NodeKind::Int(i); } } - // Otherwise parse into the fitting numeric type. - let build = match suffix { - "%" => Token::Percent, - "fr" => Token::Fraction, - "pt" => |x| Token::Length(x, LengthUnit::Pt), - "mm" => |x| Token::Length(x, LengthUnit::Mm), - "cm" => |x| Token::Length(x, LengthUnit::Cm), - "in" => |x| Token::Length(x, LengthUnit::In), - "rad" => |x| Token::Angle(x, AngularUnit::Rad), - "deg" => |x| Token::Angle(x, AngularUnit::Deg), - _ => return Token::Invalid(all), - }; - - if let Ok(float) = number.parse::() { - build(float) + if let Ok(f) = number.parse::() { + match suffix { + "" => NodeKind::Float(f), + "%" => NodeKind::Percentage(f), + "fr" => NodeKind::Fraction(f), + "pt" => NodeKind::Length(f, LengthUnit::Pt), + "mm" => NodeKind::Length(f, LengthUnit::Mm), + "cm" => NodeKind::Length(f, LengthUnit::Cm), + "in" => NodeKind::Length(f, LengthUnit::In), + "deg" => NodeKind::Angle(f, AngularUnit::Deg), + "rad" => NodeKind::Angle(f, AngularUnit::Rad), + _ => { + return NodeKind::Error(ErrorPosition::Full, all.into()); + } + } } else { - Token::Invalid(all) + NodeKind::Error(ErrorPosition::Full, all.into()) } } - fn string(&mut self) -> Token<'s> { + + fn string(&mut self) -> NodeKind { let mut escaped = false; - Token::Str(StrToken { - string: self.s.eat_until(|c| { + NodeKind::Str(StrToken { + string: resolve_string(self.s.eat_until(|c| { if c == '"' && !escaped { true } else { escaped = c == '\\' && !escaped; false } - }), + })), terminated: self.s.eat_if('"'), }) } - fn line_comment(&mut self) -> Token<'s> { - Token::LineComment(self.s.eat_until(is_newline)) + fn line_comment(&mut self) -> NodeKind { + self.s.eat_until(is_newline); + NodeKind::LineComment } - fn block_comment(&mut self) -> Token<'s> { - let start = self.s.index(); - + fn block_comment(&mut self) -> NodeKind { let mut state = '_'; let mut depth = 1; @@ -466,10 +486,7 @@ impl<'s> Tokens<'s> { } } - let terminated = depth == 0; - let end = self.s.index() - if terminated { 2 } else { 0 }; - - Token::BlockComment(self.s.get(start .. end)) + NodeKind::BlockComment } fn maybe_in_url(&self) -> bool { @@ -477,24 +494,24 @@ impl<'s> Tokens<'s> { } } -fn keyword(ident: &str) -> Option> { +fn keyword(ident: &str) -> Option { Some(match ident { - "not" => Token::Not, - "and" => Token::And, - "or" => Token::Or, - "with" => Token::With, - "let" => Token::Let, - "if" => Token::If, - "else" => Token::Else, - "for" => Token::For, - "in" => Token::In, - "while" => Token::While, - "break" => Token::Break, - "continue" => Token::Continue, - "return" => Token::Return, - "import" => Token::Import, - "include" => Token::Include, - "from" => Token::From, + "not" => NodeKind::Not, + "and" => NodeKind::And, + "or" => NodeKind::Or, + "with" => NodeKind::With, + "let" => NodeKind::Let, + "if" => NodeKind::If, + "else" => NodeKind::Else, + "for" => NodeKind::For, + "in" => NodeKind::In, + "while" => NodeKind::While, + "break" => NodeKind::Break, + "continue" => NodeKind::Continue, + "return" => NodeKind::Return, + "import" => NodeKind::Import, + "include" => NodeKind::Include, + "from" => NodeKind::From, _ => return None, }) } @@ -506,24 +523,56 @@ mod tests { use super::*; + use NodeKind::*; use Option::None; - use Token::{Ident, *}; use TokenMode::{Code, Markup}; - const fn UnicodeEscape(sequence: &str, terminated: bool) -> Token { - Token::UnicodeEscape(UnicodeEscapeToken { sequence, terminated }) + fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind { + NodeKind::UnicodeEscape(UnicodeEscapeToken { + character: resolve_hex(sequence), + sequence: sequence.into(), + terminated, + }) } - const fn Raw(text: &str, backticks: usize, terminated: bool) -> Token { - Token::Raw(RawToken { text, backticks, terminated }) + fn Raw( + text: &str, + lang: Option<&str>, + backticks: u8, + terminated: bool, + block: bool, + ) -> NodeKind { + NodeKind::Raw(RawToken { + text: text.into(), + lang: lang.map(Into::into), + backticks, + terminated, + block, + }) } - const fn Math(formula: &str, display: bool, terminated: bool) -> Token { - Token::Math(MathToken { formula, display, terminated }) + fn Math(formula: &str, display: bool, terminated: bool) -> NodeKind { + NodeKind::Math(MathToken { + formula: formula.into(), + display, + terminated, + }) } - const fn Str(string: &str, terminated: bool) -> Token { - Token::Str(StrToken { string, terminated }) + fn Str(string: &str, terminated: bool) -> NodeKind { + NodeKind::Str(StrToken { string: string.into(), terminated }) + } + + fn Text(string: &str) -> NodeKind { + NodeKind::Text(string.into()) + } + + fn Ident(ident: &str) -> NodeKind { + NodeKind::Ident(ident.into()) + } + + fn Invalid(invalid: &str) -> NodeKind { + NodeKind::Error(ErrorPosition::Full, invalid.into()) } /// Building blocks for suffix testing. @@ -541,40 +590,6 @@ mod tests { /// - '/': symbols const BLOCKS: &str = " a1/"; - /// Suffixes described by four-tuples of: - /// - /// - block the suffix is part of - /// - mode in which the suffix is applicable - /// - the suffix string - /// - the resulting suffix token - const SUFFIXES: &[(char, Option, &str, Token)] = &[ - // Whitespace suffixes. - (' ', None, " ", Space(0)), - (' ', None, "\n", Space(1)), - (' ', None, "\r", Space(1)), - (' ', None, "\r\n", Space(1)), - // Letter suffixes. - ('a', Some(Markup), "hello", Text("hello")), - ('a', Some(Markup), "💚", Text("💚")), - ('a', Some(Code), "val", Ident("val")), - ('a', Some(Code), "α", Ident("α")), - ('a', Some(Code), "_", Ident("_")), - // Number suffixes. - ('1', Some(Code), "2", Int(2)), - ('1', Some(Code), ".2", Float(0.2)), - // Symbol suffixes. - ('/', None, "[", LeftBracket), - ('/', None, "//", LineComment("")), - ('/', None, "/**/", BlockComment("")), - ('/', Some(Markup), "*", Star), - ('/', Some(Markup), "$ $", Math(" ", false, true)), - ('/', Some(Markup), r"\\", Text(r"\")), - ('/', Some(Markup), "#let", Let), - ('/', Some(Code), "(", LeftParen), - ('/', Some(Code), ":", Colon), - ('/', Some(Code), "+=", PlusEq), - ]; - macro_rules! t { (Both $($tts:tt)*) => { t!(Markup $($tts)*); @@ -584,22 +599,56 @@ mod tests { // Test without suffix. t!(@$mode: $src => $($token),*); + // Suffixes described by four-tuples of: + // + // - block the suffix is part of + // - mode in which the suffix is applicable + // - the suffix string + // - the resulting suffix NodeKind + let suffixes: &[(char, Option, &str, NodeKind)] = &[ + // Whitespace suffixes. + (' ', None, " ", Space(0)), + (' ', None, "\n", Space(1)), + (' ', None, "\r", Space(1)), + (' ', None, "\r\n", Space(1)), + // Letter suffixes. + ('a', Some(Markup), "hello", Text("hello")), + ('a', Some(Markup), "💚", Text("💚")), + ('a', Some(Code), "val", Ident("val")), + ('a', Some(Code), "α", Ident("α")), + ('a', Some(Code), "_", Ident("_")), + // Number suffixes. + ('1', Some(Code), "2", Int(2)), + ('1', Some(Code), ".2", Float(0.2)), + // Symbol suffixes. + ('/', None, "[", LeftBracket), + ('/', None, "//", LineComment), + ('/', None, "/**/", BlockComment), + ('/', Some(Markup), "*", Strong), + ('/', Some(Markup), "$ $", Math(" ", false, true)), + ('/', Some(Markup), r"\\", Text("\\")), + ('/', Some(Markup), "#let", Let), + ('/', Some(Code), "(", LeftParen), + ('/', Some(Code), ":", Colon), + ('/', Some(Code), "+=", PlusEq), + ]; + // Test with each applicable suffix. - for &(block, mode, suffix, token) in SUFFIXES { + for (block, mode, suffix, token) in suffixes { let src = $src; #[allow(unused_variables)] let blocks = BLOCKS; $(let blocks = $blocks;)? assert!(!blocks.contains(|c| !BLOCKS.contains(c))); - if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) { + if (mode.is_none() || mode == &Some($mode)) && blocks.contains(*block) { t!(@$mode: format!("{}{}", src, suffix) => $($token,)* token); } } }}; (@$mode:ident: $src:expr => $($token:expr),*) => {{ let src = $src; - let found = Tokens::new(&src, $mode).collect::>(); - let expected = vec![$($token),*]; + let found = Tokens::new(&SourceFile::detached(src.clone()), $mode).collect::>(); + let expected = vec![$($token.clone()),*]; check(&src, found, expected); }}; } @@ -671,7 +720,7 @@ mod tests { // Test text ends. t!(Markup[""]: "hello " => Text("hello"), Space(0)); - t!(Markup[""]: "hello~" => Text("hello"), Tilde); + t!(Markup[""]: "hello~" => Text("hello"), NonBreakingSpace); } #[test] @@ -713,16 +762,16 @@ mod tests { #[test] fn test_tokenize_markup_symbols() { // Test markup tokens. - t!(Markup[" a1"]: "*" => Star); - t!(Markup: "_" => Underscore); + t!(Markup[" a1"]: "*" => Strong); + t!(Markup: "_" => Emph); t!(Markup[""]: "===" => Eq, Eq, Eq); t!(Markup["a1/"]: "= " => Eq, Space(0)); - t!(Markup: "~" => Tilde); - t!(Markup[" "]: r"\" => Backslash); - t!(Markup["a "]: r"a--" => Text("a"), HyphHyph); - t!(Markup["a1/"]: "- " => Hyph, Space(0)); - t!(Markup[" "]: "." => Numbering(None)); - t!(Markup[" "]: "1." => Numbering(Some(1))); + t!(Markup: "~" => NonBreakingSpace); + t!(Markup[" "]: r"\" => Linebreak); + t!(Markup["a "]: r"a--" => Text("a"), EnDash); + t!(Markup["a1/"]: "- " => ListBullet, Space(0)); + t!(Markup[" "]: "." => EnumNumbering(None)); + t!(Markup[" "]: "1." => EnumNumbering(Some(1))); t!(Markup[" "]: "1.a" => Text("1."), Text("a")); t!(Markup[" /"]: "a1." => Text("a1.")); } @@ -734,7 +783,7 @@ mod tests { t!(Code: ";" => Semicolon); t!(Code: ":" => Colon); t!(Code: "+" => Plus); - t!(Code: "-" => Hyph); + t!(Code: "-" => Minus); t!(Code[" a1"]: "*" => Star); t!(Code[" a1"]: "/" => Slash); t!(Code: "=" => Eq); @@ -756,10 +805,10 @@ mod tests { t!(Code[" a/"]: "..." => Dots, Invalid(".")); // Test hyphen as symbol vs part of identifier. - t!(Code[" /"]: "-1" => Hyph, Int(1)); - t!(Code[" /"]: "-a" => Hyph, Ident("a")); - t!(Code[" /"]: "--1" => Hyph, Hyph, Int(1)); - t!(Code[" /"]: "--_a" => Hyph, Hyph, Ident("_a")); + t!(Code[" /"]: "-1" => Minus, Int(1)); + t!(Code[" /"]: "-a" => Minus, Ident("a")); + t!(Code[" /"]: "--1" => Minus, Minus, Int(1)); + t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a")); t!(Code[" /"]: "a-b" => Ident("a-b")); } @@ -776,13 +825,13 @@ mod tests { ("import", Import), ]; - for &(s, t) in &list { + for (s, t) in list.clone() { t!(Markup[" "]: format!("#{}", s) => t); t!(Markup[" "]: format!("#{0}#{0}", s) => t, t); - t!(Markup[" /"]: format!("# {}", s) => Token::Text("#"), Space(0), Text(s)); + t!(Markup[" /"]: format!("# {}", s) => Text("#"), Space(0), Text(s)); } - for &(s, t) in &list { + for (s, t) in list { t!(Code[" "]: s => t); t!(Markup[" /"]: s => Text(s)); } @@ -796,25 +845,23 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { - let empty = Raw("", 1, true); - // Test basic raw block. - t!(Markup: "``" => empty); - t!(Markup: "`raw`" => Raw("raw", 1, true)); - t!(Markup[""]: "`]" => Raw("]", 1, false)); + t!(Markup: "``" => Raw("", None, 1, true, false)); + t!(Markup: "`raw`" => Raw("raw", None, 1, true, false)); + t!(Markup[""]: "`]" => Raw("]", None, 1, false, false)); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", 1, true)); - t!(Markup[""]: r"`\`` " => Raw(r"\", 1, true), Raw(" ", 1, false)); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, true, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, true, false), Raw(" ", None, 1, false, false)); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("not `y`e`t", 3, true)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, true, false)); // Test more backticks. - t!(Markup: "``nope``" => empty, Text("nope"), empty); - t!(Markup: "````🚀````" => Raw("🚀", 4, true)); - t!(Markup[""]: "`````👩‍🚀````noend" => Raw("👩‍🚀````noend", 5, false)); - t!(Markup[""]: "````raw``````" => Raw("raw", 4, true), empty); + t!(Markup: "``nope``" => Raw("", None, 1, true, false), Text("nope"), Raw("", None, 1, true, false)); + t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, true, false)); + t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, false, false)); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, true, false), Raw("", None, 1, true, false)); } #[test] @@ -896,8 +943,8 @@ mod tests { let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats); let suffixes = [ - ("%", Percent as fn(f64) -> Token<'static>), - ("fr", Fraction as fn(f64) -> Token<'static>), + ("%", Percentage as fn(f64) -> NodeKind), + ("fr", Fraction as fn(f64) -> NodeKind), ("mm", |x| Length(x, LengthUnit::Mm)), ("pt", |x| Length(x, LengthUnit::Pt)), ("cm", |x| Length(x, LengthUnit::Cm)), @@ -930,54 +977,54 @@ mod tests { t!(Code[""]: "\"hi" => Str("hi", false)); // Test escaped quote. - t!(Code: r#""a\"bc""# => Str(r#"a\"bc"#, true)); - t!(Code[""]: r#""\""# => Str(r#"\""#, false)); + t!(Code: r#""a\"bc""# => Str("a\"bc", true)); + t!(Code[""]: r#""\""# => Str("\"", false)); } #[test] fn test_tokenize_line_comments() { // Test line comment with no trailing newline. - t!(Both[""]: "//" => LineComment("")); + t!(Both[""]: "//" => LineComment); // Test line comment ends at newline. - t!(Both["a1/"]: "//bc\n" => LineComment("bc"), Space(1)); - t!(Both["a1/"]: "// bc \n" => LineComment(" bc "), Space(1)); - t!(Both["a1/"]: "//bc\r\n" => LineComment("bc"), Space(1)); + t!(Both["a1/"]: "//bc\n" => LineComment, Space(1)); + t!(Both["a1/"]: "// bc \n" => LineComment, Space(1)); + t!(Both["a1/"]: "//bc\r\n" => LineComment, Space(1)); // Test nested line comments. - t!(Both["a1/"]: "//a//b\n" => LineComment("a//b"), Space(1)); + t!(Both["a1/"]: "//a//b\n" => LineComment, Space(1)); } #[test] fn test_tokenize_block_comments() { // Test basic block comments. - t!(Both[""]: "/*" => BlockComment("")); - t!(Both: "/**/" => BlockComment("")); - t!(Both: "/*🏞*/" => BlockComment("🏞")); - t!(Both: "/*\n*/" => BlockComment("\n")); + t!(Both[""]: "/*" => BlockComment); + t!(Both: "/**/" => BlockComment); + t!(Both: "/*🏞*/" => BlockComment); + t!(Both: "/*\n*/" => BlockComment); // Test depth 1 and 2 nested block comments. - t!(Both: "/* /* */ */" => BlockComment(" /* */ ")); - t!(Both: "/*/*/**/*/*/" => BlockComment("/*/**/*/")); + t!(Both: "/* /* */ */" => BlockComment); + t!(Both: "/*/*/**/*/*/" => BlockComment); // Test two nested, one unclosed block comments. - t!(Both[""]: "/*/*/**/*/" => BlockComment("/*/**/*/")); + t!(Both[""]: "/*/*/**/*/" => BlockComment); // Test all combinations of up to two following slashes and stars. - t!(Both[""]: "/*" => BlockComment("")); - t!(Both[""]: "/*/" => BlockComment("/")); - t!(Both[""]: "/**" => BlockComment("*")); - t!(Both[""]: "/*//" => BlockComment("//")); - t!(Both[""]: "/*/*" => BlockComment("/*")); - t!(Both[""]: "/**/" => BlockComment("")); - t!(Both[""]: "/***" => BlockComment("**")); + t!(Both[""]: "/*" => BlockComment); + t!(Both[""]: "/*/" => BlockComment); + t!(Both[""]: "/**" => BlockComment); + t!(Both[""]: "/*//" => BlockComment); + t!(Both[""]: "/*/*" => BlockComment); + t!(Both[""]: "/**/" => BlockComment); + t!(Both[""]: "/***" => BlockComment); } #[test] fn test_tokenize_invalid() { // Test invalidly closed block comments. - t!(Both: "*/" => Token::Invalid("*/")); - t!(Both: "/**/*/" => BlockComment(""), Token::Invalid("*/")); + t!(Both: "*/" => Invalid("*/")); + t!(Both: "/**/*/" => BlockComment, Invalid("*/")); // Test invalid expressions. t!(Code: r"\" => Invalid(r"\")); @@ -990,6 +1037,6 @@ mod tests { // Test invalid number suffixes. t!(Code[" /"]: "1foo" => Invalid("1foo")); t!(Code: "1p%" => Invalid("1p"), Invalid("%")); - t!(Code: "1%%" => Percent(1.0), Invalid("%")); + t!(Code: "1%%" => Percentage(1.0), Invalid("%")); } } diff --git a/src/source.rs b/src/source.rs index c9164f90b..e33e146c0 100644 --- a/src/source.rs +++ b/src/source.rs @@ -8,8 +8,10 @@ use std::rc::Rc; use serde::{Deserialize, Serialize}; +use crate::diag::{Error, TypResult}; use crate::loading::{FileHash, Loader}; -use crate::parse::{is_newline, Scanner}; +use crate::parse::{is_newline, parse, Scanner}; +use crate::syntax::{GreenNode, Markup, NodeKind, RedNode}; use crate::util::PathExt; #[cfg(feature = "codespan-reporting")] @@ -124,6 +126,7 @@ pub struct SourceFile { path: PathBuf, src: String, line_starts: Vec, + root: Rc, } impl SourceFile { @@ -131,11 +134,28 @@ impl SourceFile { pub fn new(id: SourceId, path: &Path, src: String) -> Self { let mut line_starts = vec![0]; line_starts.extend(newlines(&src)); - Self { + let mut init = Self { id, path: path.normalize(), src, line_starts, + root: Rc::new(GreenNode::new(NodeKind::Markup, 0)), + }; + + let root = parse(&init); + init.root = root; + init + } + + pub fn ast(&self) -> TypResult { + let res = RedNode::new_root(self.root.clone(), self.id); + let errors = res.errors(); + if errors.is_empty() { + Ok(res.ticket().cast().unwrap()) + } else { + Err(Box::new( + errors.into_iter().map(|(span, msg)| Error::new(span, msg)).collect(), + )) } } diff --git a/src/syntax/expr.rs b/src/syntax/expr.rs index 904515bab..d0d0c62fe 100644 --- a/src/syntax/expr.rs +++ b/src/syntax/expr.rs @@ -1,75 +1,50 @@ -use std::rc::Rc; - -use super::{Ident, Markup, Span, Token}; +use super::{Ident, Markup, NodeKind, RedNode, RedTicket, Span, TypedNode}; use crate::geom::{AngularUnit, LengthUnit}; +use crate::node; use crate::util::EcoString; /// An expression. #[derive(Debug, Clone, PartialEq)] pub enum Expr { /// An identifier: `left`. - Ident(Box), + Ident(Ident), /// A literal: `1`, `true`, ... - Lit(Box), + Lit(Lit), /// An array expression: `(1, "hi", 12cm)`. - Array(Box), + Array(ArrayExpr), /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. - Dict(Box), + Dict(DictExpr), /// A template expression: `[*Hi* there!]`. - Template(Box), + Template(TemplateExpr), /// A grouped expression: `(1 + 2)`. - Group(Box), + Group(GroupExpr), /// A block expression: `{ let x = 1; x + 2 }`. - Block(Box), + Block(BlockExpr), /// A unary operation: `-x`. - Unary(Box), + Unary(UnaryExpr), /// A binary operation: `a + b`. - Binary(Box), + Binary(BinaryExpr), /// An invocation of a function: `f(x, y)`. - Call(Box), + Call(CallExpr), /// A closure expression: `(x, y) => z`. - Closure(Box), + Closure(ClosureExpr), /// A with expression: `f with (x, y: 1)`. - With(Box), + With(WithExpr), /// A let expression: `let x = 1`. - Let(Box), + Let(LetExpr), /// An if-else expression: `if x { y } else { z }`. - If(Box), + If(IfExpr), /// A while loop expression: `while x { y }`. - While(Box), + While(WhileExpr), /// A for loop expression: `for x in y { z }`. - For(Box), + For(ForExpr), /// An import expression: `import a, b, c from "utils.typ"`. - Import(Box), + Import(ImportExpr), /// An include expression: `include "chapter1.typ"`. - Include(Box), + Include(IncludeExpr), } impl Expr { - /// The source code location. - pub fn span(&self) -> Span { - match self { - Self::Ident(v) => v.span, - Self::Lit(v) => v.span(), - Self::Array(v) => v.span, - Self::Dict(v) => v.span, - Self::Template(v) => v.span, - Self::Group(v) => v.span, - Self::Block(v) => v.span, - Self::Unary(v) => v.span, - Self::Binary(v) => v.span, - Self::Call(v) => v.span, - Self::Closure(v) => v.span, - Self::With(v) => v.span, - Self::Let(v) => v.span, - Self::If(v) => v.span, - Self::While(v) => v.span, - Self::For(v) => v.span, - Self::Import(v) => v.span, - Self::Include(v) => v.span, - } - } - /// Whether the expression can be shortened in markup with a hashtag. pub fn has_short_form(&self) -> bool { matches!(self, @@ -83,6 +58,63 @@ impl Expr { | Self::Include(_) ) } + + /// Return the expression's span. + pub fn span(&self) -> Span { + match self { + Self::Ident(ident) => ident.span, + Self::Lit(lit) => lit.span(), + Self::Array(array) => array.span(), + Self::Dict(dict) => dict.span(), + Self::Template(template) => template.span(), + Self::Group(group) => group.span(), + Self::Block(block) => block.span(), + Self::Unary(unary) => unary.span(), + Self::Binary(binary) => binary.span(), + Self::Call(call) => call.span(), + Self::Closure(closure) => closure.span(), + Self::With(with) => with.span(), + Self::Let(let_) => let_.span(), + Self::If(if_) => if_.span(), + Self::While(while_) => while_.span(), + Self::For(for_) => for_.span(), + Self::Import(import) => import.span(), + Self::Include(include) => include.span(), + } + } +} + +impl TypedNode for Expr { + fn cast_from(node: RedTicket) -> Option { + match node.kind() { + NodeKind::Ident(_) => Some(Self::Ident(Ident::cast_from(node).unwrap())), + NodeKind::Array => Some(Self::Array(ArrayExpr::cast_from(node).unwrap())), + NodeKind::Dict => Some(Self::Dict(DictExpr::cast_from(node).unwrap())), + NodeKind::Template => { + Some(Self::Template(TemplateExpr::cast_from(node).unwrap())) + } + NodeKind::Group => Some(Self::Group(GroupExpr::cast_from(node).unwrap())), + NodeKind::Block => Some(Self::Block(BlockExpr::cast_from(node).unwrap())), + NodeKind::Unary => Some(Self::Unary(UnaryExpr::cast_from(node).unwrap())), + NodeKind::Binary => Some(Self::Binary(BinaryExpr::cast_from(node).unwrap())), + NodeKind::Call => Some(Self::Call(CallExpr::cast_from(node).unwrap())), + NodeKind::Closure => { + Some(Self::Closure(ClosureExpr::cast_from(node).unwrap())) + } + NodeKind::WithExpr => Some(Self::With(WithExpr::cast_from(node).unwrap())), + NodeKind::LetExpr => Some(Self::Let(LetExpr::cast_from(node).unwrap())), + NodeKind::IfExpr => Some(Self::If(IfExpr::cast_from(node).unwrap())), + NodeKind::WhileExpr => Some(Self::While(WhileExpr::cast_from(node).unwrap())), + NodeKind::ForExpr => Some(Self::For(ForExpr::cast_from(node).unwrap())), + NodeKind::ImportExpr => { + Some(Self::Import(ImportExpr::cast_from(node).unwrap())) + } + NodeKind::IncludeExpr => { + Some(Self::Include(IncludeExpr::cast_from(node).unwrap())) + } + _ => Some(Self::Lit(Lit::cast_from(node)?)), + } + } } /// A literal: `1`, `true`, ... @@ -113,94 +145,145 @@ pub enum Lit { Str(Span, EcoString), } -impl Lit { - /// The source code location. - pub fn span(&self) -> Span { - match *self { - Self::None(span) => span, - Self::Auto(span) => span, - Self::Bool(span, _) => span, - Self::Int(span, _) => span, - Self::Float(span, _) => span, - Self::Length(span, _, _) => span, - Self::Angle(span, _, _) => span, - Self::Percent(span, _) => span, - Self::Fractional(span, _) => span, - Self::Str(span, _) => span, +impl TypedNode for Lit { + fn cast_from(node: RedTicket) -> Option { + match node.kind() { + NodeKind::None => Some(Self::None(node.own().span())), + NodeKind::Auto => Some(Self::Auto(node.own().span())), + NodeKind::Bool(b) => Some(Self::Bool(node.own().span(), *b)), + NodeKind::Int(i) => Some(Self::Int(node.own().span(), *i)), + NodeKind::Float(f) => Some(Self::Float(node.own().span(), *f)), + NodeKind::Length(f, unit) => Some(Self::Length(node.own().span(), *f, *unit)), + NodeKind::Angle(f, unit) => Some(Self::Angle(node.own().span(), *f, *unit)), + NodeKind::Percentage(f) => Some(Self::Percent(node.own().span(), *f)), + NodeKind::Fraction(f) => Some(Self::Fractional(node.own().span(), *f)), + NodeKind::Str(s) => Some(Self::Str(node.own().span(), s.string.clone())), + _ => None, } } } -/// An array expression: `(1, "hi", 12cm)`. -#[derive(Debug, Clone, PartialEq)] -pub struct ArrayExpr { - /// The source code location. - pub span: Span, - /// The entries of the array. - pub items: Vec, -} - -/// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. -#[derive(Debug, Clone, PartialEq)] -pub struct DictExpr { - /// The source code location. - pub span: Span, - /// The named dictionary entries. - pub items: Vec, -} - -/// A pair of a name and an expression: `pattern: dashed`. -#[derive(Debug, Clone, PartialEq)] -pub struct Named { - /// The name: `pattern`. - pub name: Ident, - /// The right-hand side of the pair: `dashed`. - pub expr: Expr, -} - -impl Named { - /// The source code location. +impl Lit { pub fn span(&self) -> Span { - self.name.span.join(self.expr.span()) + match self { + Self::None(span) => *span, + Self::Auto(span) => *span, + Self::Bool(span, _) => *span, + Self::Int(span, _) => *span, + Self::Float(span, _) => *span, + Self::Length(span, _, _) => *span, + Self::Angle(span, _, _) => *span, + Self::Percent(span, _) => *span, + Self::Fractional(span, _) => *span, + Self::Str(span, _) => *span, + } } } -/// A template expression: `[*Hi* there!]`. -#[derive(Debug, Clone, PartialEq)] -pub struct TemplateExpr { - /// The source code location. - pub span: Span, +node!( + /// An array expression: `(1, "hi", 12cm)`. + Array => ArrayExpr +); + +impl ArrayExpr { + /// The array items. + pub fn items(&self) -> Vec { + self.0.children().filter_map(RedTicket::cast).collect() + } +} + +node!( + /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. + Dict => DictExpr +); + +impl DictExpr { + /// The named dictionary items. + pub fn items(&self) -> Vec { + self.0.children().filter_map(RedTicket::cast).collect() + } +} + +node!( + /// A pair of a name and an expression: `pattern: dashed`. + Named +); + +impl Named { + /// The name: `pattern`. + pub fn name(&self) -> Ident { + self.0.cast_first_child().expect("named pair is missing name ident") + } + + /// The right-hand side of the pair: `dashed`. + pub fn expr(&self) -> Expr { + self.0 + .children() + .filter_map(RedTicket::cast) + .nth(1) + .expect("named pair is missing expression") + } +} + +node!( + /// A template expression: `[*Hi* there!]`. + Template => TemplateExpr +); + +impl TemplateExpr { /// The contents of the template. - pub body: Markup, + pub fn body(&self) -> Markup { + self.0 + .cast_first_child() + .expect("template expression is missing body") + } } -/// A grouped expression: `(1 + 2)`. -#[derive(Debug, Clone, PartialEq)] -pub struct GroupExpr { - /// The source code location. - pub span: Span, +node!( + /// A grouped expression: `(1 + 2)`. + Group => GroupExpr +); + +impl GroupExpr { /// The wrapped expression. - pub expr: Expr, + pub fn expr(&self) -> Expr { + self.0 + .cast_first_child() + .expect("group expression is missing expression") + } } -/// A block expression: `{ let x = 1; x + 2 }`. -#[derive(Debug, Clone, PartialEq)] -pub struct BlockExpr { - /// The source code location. - pub span: Span, +node!( + /// A block expression: `{ let x = 1; x + 2 }`. + Block => BlockExpr +); + +impl BlockExpr { /// The list of expressions contained in the block. - pub exprs: Vec, + pub fn exprs(&self) -> Vec { + self.0.children().filter_map(RedTicket::cast).collect() + } } -/// A unary operation: `-x`. -#[derive(Debug, Clone, PartialEq)] -pub struct UnaryExpr { - /// The source code location. - pub span: Span, +node!( + /// A unary operation: `-x`. + Unary => UnaryExpr +); + +impl UnaryExpr { /// The operator: `-`. - pub op: UnOp, + pub fn op(&self) -> UnOp { + self.0 + .cast_first_child() + .expect("unary expression is missing operator") + } + /// The expression to operator on: `x`. - pub expr: Expr, + pub fn expr(&self) -> Expr { + self.0 + .cast_first_child() + .expect("unary expression is missing expression") + } } /// A unary operator. @@ -214,13 +297,19 @@ pub enum UnOp { Not, } +impl TypedNode for UnOp { + fn cast_from(node: RedTicket) -> Option { + Self::from_token(node.kind()) + } +} + impl UnOp { /// Try to convert the token into a unary operation. - pub fn from_token(token: Token) -> Option { + pub fn from_token(token: &NodeKind) -> Option { Some(match token { - Token::Plus => Self::Pos, - Token::Hyph => Self::Neg, - Token::Not => Self::Not, + NodeKind::Plus => Self::Pos, + NodeKind::Minus => Self::Neg, + NodeKind::Not => Self::Not, _ => return None, }) } @@ -229,7 +318,7 @@ impl UnOp { pub fn precedence(self) -> usize { match self { Self::Pos | Self::Neg => 8, - Self::Not => 3, + Self::Not => 4, } } @@ -243,17 +332,34 @@ impl UnOp { } } -/// A binary operation: `a + b`. -#[derive(Debug, Clone, PartialEq)] -pub struct BinaryExpr { - /// The source code location. - pub span: Span, +node!( + /// A binary operation: `a + b`. + Binary => BinaryExpr +); + +impl BinaryExpr { + /// The binary operator: `+`. + pub fn op(&self) -> BinOp { + self.0 + .cast_first_child() + .expect("binary expression is missing operator") + } + /// The left-hand side of the operation: `a`. - pub lhs: Expr, - /// The operator: `+`. - pub op: BinOp, + pub fn lhs(&self) -> Expr { + self.0 + .cast_first_child() + .expect("binary expression is missing left-hand side") + } + /// The right-hand side of the operation: `b`. - pub rhs: Expr, + pub fn rhs(&self) -> Expr { + self.0 + .children() + .filter_map(RedTicket::cast) + .nth(1) + .expect("binary expression is missing right-hand side") + } } /// A binary operator. @@ -295,27 +401,33 @@ pub enum BinOp { DivAssign, } +impl TypedNode for BinOp { + fn cast_from(node: RedTicket) -> Option { + Self::from_token(node.kind()) + } +} + impl BinOp { /// Try to convert the token into a binary operation. - pub fn from_token(token: Token) -> Option { + pub fn from_token(token: &NodeKind) -> Option { Some(match token { - Token::Plus => Self::Add, - Token::Hyph => Self::Sub, - Token::Star => Self::Mul, - Token::Slash => Self::Div, - Token::And => Self::And, - Token::Or => Self::Or, - Token::EqEq => Self::Eq, - Token::ExclEq => Self::Neq, - Token::Lt => Self::Lt, - Token::LtEq => Self::Leq, - Token::Gt => Self::Gt, - Token::GtEq => Self::Geq, - Token::Eq => Self::Assign, - Token::PlusEq => Self::AddAssign, - Token::HyphEq => Self::SubAssign, - Token::StarEq => Self::MulAssign, - Token::SlashEq => Self::DivAssign, + NodeKind::Plus => Self::Add, + NodeKind::Minus => Self::Sub, + NodeKind::Star => Self::Mul, + NodeKind::Slash => Self::Div, + NodeKind::And => Self::And, + NodeKind::Or => Self::Or, + NodeKind::EqEq => Self::Eq, + NodeKind::ExclEq => Self::Neq, + NodeKind::Lt => Self::Lt, + NodeKind::LtEq => Self::Leq, + NodeKind::Gt => Self::Gt, + NodeKind::GtEq => Self::Geq, + NodeKind::Eq => Self::Assign, + NodeKind::PlusEq => Self::AddAssign, + NodeKind::HyphEq => Self::SubAssign, + NodeKind::StarEq => Self::MulAssign, + NodeKind::SlashEq => Self::DivAssign, _ => return None, }) } @@ -392,27 +504,35 @@ pub enum Associativity { Right, } -/// An invocation of a function: `foo(...)`. -#[derive(Debug, Clone, PartialEq)] -pub struct CallExpr { - /// The source code location. - pub span: Span, +node!( + /// An invocation of a function: `foo(...)`. + Call => CallExpr +); + +impl CallExpr { /// The function to call. - pub callee: Expr, + pub fn callee(&self) -> Expr { + self.0.cast_first_child().expect("call expression is missing callee") + } + /// The arguments to the function. - pub args: CallArgs, + pub fn args(&self) -> CallArgs { + self.0 + .cast_first_child() + .expect("call expression is missing argument list") + } } -/// The arguments to a function: `12, draw: false`. -/// -/// In case of a bracketed invocation with a body, the body is _not_ -/// included in the span for the sake of clearer error messages. -#[derive(Debug, Clone, PartialEq)] -pub struct CallArgs { - /// The source code location. - pub span: Span, +node!( + /// The arguments to a function: `12, draw: false`. + CallArgs +); + +impl CallArgs { /// The positional and named arguments. - pub items: Vec, + pub fn items(&self) -> Vec { + self.0.children().filter_map(RedTicket::cast).collect() + } } /// An argument to a function call. @@ -426,30 +546,75 @@ pub enum CallArg { Spread(Expr), } +impl TypedNode for CallArg { + fn cast_from(node: RedTicket) -> Option { + match node.kind() { + NodeKind::Named => Some(CallArg::Named( + node.cast().expect("named call argument is missing name"), + )), + NodeKind::ParameterSink => Some(CallArg::Spread( + node.own() + .cast_first_child() + .expect("call argument sink is missing expression"), + )), + _ => Some(CallArg::Pos(node.cast()?)), + } + } +} + impl CallArg { - /// The source code location. + /// The name of this argument. pub fn span(&self) -> Span { match self { - Self::Pos(expr) => expr.span(), Self::Named(named) => named.span(), + Self::Pos(expr) => expr.span(), Self::Spread(expr) => expr.span(), } } } -/// A closure expression: `(x, y) => z`. -#[derive(Debug, Clone, PartialEq)] -pub struct ClosureExpr { - /// The source code location. - pub span: Span, +node!( + /// A closure expression: `(x, y) => z`. + Closure => ClosureExpr +); + +impl ClosureExpr { /// The name of the closure. /// /// This only exists if you use the function syntax sugar: `let f(x) = y`. - pub name: Option, + pub fn name(&self) -> Option { + // `first_convert_child` does not work here because of the Option in the + // Result. + self.0.cast_first_child() + } + /// The parameter bindings. - pub params: Vec, + pub fn params(&self) -> Vec { + self.0 + .children() + .find(|x| x.kind() == &NodeKind::ClosureParams) + .expect("closure is missing parameter list") + .own() + .children() + .filter_map(RedTicket::cast) + .collect() + } + /// The body of the closure. - pub body: Rc, + pub fn body(&self) -> Expr { + // The filtering for the NodeKind is necessary here because otherwise, + // `first_convert_child` will use the Ident if present. + self.0.cast_last_child().expect("closure is missing body") + } + + /// The ticket of the body of the closure. + pub fn body_ticket(&self) -> RedTicket { + self.0 + .children() + .filter(|x| x.cast::().is_some()) + .last() + .unwrap() + } } /// An parameter to a closure. @@ -463,50 +628,111 @@ pub enum ClosureParam { Sink(Ident), } -impl ClosureParam { - /// The source code location. - pub fn span(&self) -> Span { - match self { - Self::Pos(ident) => ident.span, - Self::Named(named) => named.span(), - Self::Sink(ident) => ident.span, +impl TypedNode for ClosureParam { + fn cast_from(node: RedTicket) -> Option { + match node.kind() { + NodeKind::Ident(i) => { + Some(ClosureParam::Pos(Ident::new(i, node.own().span()).unwrap())) + } + NodeKind::Named => Some(ClosureParam::Named( + node.cast().expect("named closure parameter is missing name"), + )), + NodeKind::ParameterSink => Some(ClosureParam::Sink( + node.own() + .cast_first_child() + .expect("closure parameter sink is missing identifier"), + )), + _ => Some(ClosureParam::Pos(node.cast()?)), } } } -/// A with expression: `f with (x, y: 1)`. -/// -/// Applies arguments to a function. -#[derive(Debug, Clone, PartialEq)] -pub struct WithExpr { - /// The source code location. - pub span: Span, +node!( + /// A with expression: `f with (x, y: 1)`. + WithExpr +); + +impl WithExpr { /// The function to apply the arguments to. - pub callee: Expr, + pub fn callee(&self) -> Expr { + self.0 + .cast_first_child() + .expect("with expression is missing callee expression") + } + /// The arguments to apply to the function. - pub args: CallArgs, + pub fn args(&self) -> CallArgs { + self.0 + .cast_first_child() + .expect("with expression is missing argument list") + } } -/// A let expression: `let x = 1`. -#[derive(Debug, Clone, PartialEq)] -pub struct LetExpr { - /// The source code location. - pub span: Span, +node!( + /// A let expression: `let x = 1`. + LetExpr +); + +impl LetExpr { /// The binding to assign to. - pub binding: Ident, + pub fn binding(&self) -> Ident { + if let Some(c) = self.0.cast_first_child() { + c + } else if let Some(w) = self.0.typed_child(&NodeKind::WithExpr) { + // Can't do an `first_convert_child` here because the WithExpr's + // callee has to be an identifier. + w.cast_first_child() + .expect("with expression is missing an identifier callee") + } else if let Some(Expr::Closure(c)) = self.0.cast_last_child() { + c.name().expect("closure is missing an identifier name") + } else { + panic!("let expression is missing either an identifier or a with expression") + } + } + /// The expression the binding is initialized with. - pub init: Option, + pub fn init(&self) -> Option { + if self.0.cast_first_child::().is_some() { + self.0.children().filter_map(RedTicket::cast).nth(1) + } else { + Some( + self.0 + .cast_first_child() + .expect("let expression is missing a with expression"), + ) + } + } + + /// The ticket for the expression the binding is initialized with. + pub fn init_ticket(&self) -> RedTicket { + if self.0.cast_first_child::().is_some() { + self.0.children().filter(|x| x.cast::().is_some()).nth(1) + } else { + self.0.children().find(|x| x.cast::().is_some()) + } + .unwrap() + } } -/// An import expression: `import a, b, c from "utils.typ"`. -#[derive(Debug, Clone, PartialEq)] -pub struct ImportExpr { - /// The source code location. - pub span: Span, +node!( + /// An import expression: `import a, b, c from "utils.typ"`. + ImportExpr +); + +impl ImportExpr { /// The items to be imported. - pub imports: Imports, + pub fn imports(&self) -> Imports { + self.0 + .cast_first_child() + .expect("import expression is missing import list") + } + /// The location of the importable file. - pub path: Expr, + pub fn path(&self) -> Expr { + self.0 + .cast_first_child() + .expect("import expression is missing path expression") + } } /// The items that ought to be imported from a file. @@ -518,67 +744,137 @@ pub enum Imports { Idents(Vec), } -/// An include expression: `include "chapter1.typ"`. -#[derive(Debug, Clone, PartialEq)] -pub struct IncludeExpr { - /// The source code location. - pub span: Span, - /// The location of the file to be included. - pub path: Expr, -} - -/// An if-else expression: `if x { y } else { z }`. -#[derive(Debug, Clone, PartialEq)] -pub struct IfExpr { - /// The source code location. - pub span: Span, - /// The condition which selects the body to evaluate. - pub condition: Expr, - /// The expression to evaluate if the condition is true. - pub if_body: Expr, - /// The expression to evaluate if the condition is false. - pub else_body: Option, -} - -/// A while loop expression: `while x { y }`. -#[derive(Debug, Clone, PartialEq)] -pub struct WhileExpr { - /// The source code location. - pub span: Span, - /// The condition which selects whether to evaluate the body. - pub condition: Expr, - /// The expression to evaluate while the condition is true. - pub body: Expr, -} - -/// A for loop expression: `for x in y { z }`. -#[derive(Debug, Clone, PartialEq)] -pub struct ForExpr { - /// The source code location. - pub span: Span, - /// The pattern to assign to. - pub pattern: ForPattern, - /// The expression to iterate over. - pub iter: Expr, - /// The expression to evaluate for each iteration. - pub body: Expr, -} - -/// A pattern in a for loop. -#[derive(Debug, Clone, PartialEq)] -pub enum ForPattern { - /// A value pattern: `for v in array`. - Value(Ident), - /// A key-value pattern: `for k, v in dict`. - KeyValue(Ident, Ident), -} - -impl ForPattern { - /// The source code location. - pub fn span(&self) -> Span { - match self { - Self::Value(v) => v.span, - Self::KeyValue(k, v) => k.span.join(v.span), +impl TypedNode for Imports { + fn cast_from(node: RedTicket) -> Option { + match node.kind() { + NodeKind::Star => Some(Imports::Wildcard), + NodeKind::ImportItems => { + let idents = node.own().children().filter_map(RedTicket::cast).collect(); + Some(Imports::Idents(idents)) + } + _ => None, } } } + +node!( + /// An include expression: `include "chapter1.typ"`. + IncludeExpr +); + +impl IncludeExpr { + /// The location of the file to be included. + pub fn path(&self) -> Expr { + self.0 + .cast_first_child() + .expect("include expression is missing path expression") + } +} + +node!( + /// An if-else expression: `if x { y } else { z }`. + IfExpr +); + +impl IfExpr { + /// The condition which selects the body to evaluate. + pub fn condition(&self) -> Expr { + self.0 + .cast_first_child() + .expect("if expression is missing condition expression") + } + + /// The expression to evaluate if the condition is true. + pub fn if_body(&self) -> Expr { + self.0 + .children() + .filter_map(RedTicket::cast) + .nth(1) + .expect("if expression is missing if body") + } + + /// The expression to evaluate if the condition is false. + pub fn else_body(&self) -> Option { + self.0.children().filter_map(RedTicket::cast).nth(2) + } +} + +node!( + /// A while loop expression: `while x { y }`. + WhileExpr +); + +impl WhileExpr { + /// The condition which selects whether to evaluate the body. + pub fn condition(&self) -> Expr { + self.0 + .cast_first_child() + .expect("while loop expression is missing condition expression") + } + + /// The expression to evaluate while the condition is true. + pub fn body(&self) -> Expr { + self.0 + .children() + .filter_map(RedTicket::cast) + .nth(1) + .expect("while loop expression is missing body") + } +} + +node!( + /// A for loop expression: `for x in y { z }`. + ForExpr +); + +impl ForExpr { + /// The pattern to assign to. + pub fn pattern(&self) -> ForPattern { + self.0 + .cast_first_child() + .expect("for loop expression is missing pattern") + } + + /// The expression to iterate over. + pub fn iter(&self) -> Expr { + self.0 + .cast_first_child() + .expect("for loop expression is missing iterable expression") + } + + /// The expression to evaluate for each iteration. + pub fn body(&self) -> Expr { + self.0 + .children() + .filter_map(RedTicket::cast) + .last() + .expect("for loop expression is missing body") + } + + /// The ticket for the expression to evaluate for each iteration. + pub fn body_ticket(&self) -> RedTicket { + self.0 + .children() + .filter(|x| x.cast::().is_some()) + .last() + .unwrap() + } +} + +node!( + /// A for-in loop expression: `for x in y { z }`. + ForPattern +); + +impl ForPattern { + pub fn key(&self) -> Option { + let mut items: Vec<_> = self.0.children().filter_map(RedTicket::cast).collect(); + if items.len() > 1 { Some(items.remove(0)) } else { None } + } + + pub fn value(&self) -> Ident { + self.0 + .cast_last_child() + .expect("for-in loop pattern is missing value") + } +} diff --git a/src/syntax/ident.rs b/src/syntax/ident.rs index 398e2ff98..2c61329d1 100644 --- a/src/syntax/ident.rs +++ b/src/syntax/ident.rs @@ -3,7 +3,7 @@ use std::ops::Deref; use unicode_xid::UnicodeXID; -use super::Span; +use super::{NodeKind, RedTicket, Span, TypedNode}; use crate::util::EcoString; /// An unicode identifier with a few extra permissible characters. @@ -66,6 +66,16 @@ impl From<&Ident> for EcoString { } } +impl TypedNode for Ident { + fn cast_from(node: RedTicket) -> Option { + if let NodeKind::Ident(i) = node.kind() { + Some(Ident::new(i, node.own().span()).unwrap()) + } else { + None + } + } +} + /// Whether a string is a valid identifier. pub fn is_ident(string: &str) -> bool { let mut chars = string.chars(); diff --git a/src/syntax/markup.rs b/src/syntax/markup.rs index 09a371161..c12c0e819 100644 --- a/src/syntax/markup.rs +++ b/src/syntax/markup.rs @@ -1,41 +1,87 @@ -use super::{Expr, Ident, Span}; +use super::{Expr, Ident, NodeKind, RedNode, RedTicket, Span, TypedNode}; +use crate::node; use crate::util::EcoString; +use std::fmt::Write; /// The syntactical root capable of representing a full parsed document. pub type Markup = Vec; +impl TypedNode for Markup { + fn cast_from(node: RedTicket) -> Option { + if node.kind() != &NodeKind::Markup { + return None; + } + + let children = node.own().children().filter_map(TypedNode::cast_from).collect(); + Some(children) + } +} + /// A single piece of markup. #[derive(Debug, Clone, PartialEq)] pub enum MarkupNode { /// Whitespace containing less than two newlines. Space, /// A forced line break: `\`. - Linebreak(Span), + Linebreak, /// A paragraph break: Two or more newlines. - Parbreak(Span), + Parbreak, /// Strong text was enabled / disabled: `*`. - Strong(Span), + Strong, /// Emphasized text was enabled / disabled: `_`. - Emph(Span), + Emph, /// Plain text. Text(EcoString), /// A raw block with optional syntax highlighting: `` `...` ``. - Raw(Box), + Raw(RawNode), /// A section heading: `= Introduction`. - Heading(Box), + Heading(HeadingNode), /// An item in an unordered list: `- ...`. - List(Box), + List(ListNode), /// An item in an enumeration (ordered list): `1. ...`. - Enum(Box), + Enum(EnumNode), /// An expression. Expr(Expr), } +impl TypedNode for MarkupNode { + fn cast_from(node: RedTicket) -> Option { + match node.kind() { + NodeKind::Space(_) => Some(MarkupNode::Space), + NodeKind::Linebreak => Some(MarkupNode::Linebreak), + NodeKind::Parbreak => Some(MarkupNode::Parbreak), + NodeKind::Strong => Some(MarkupNode::Strong), + NodeKind::Emph => Some(MarkupNode::Emph), + NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), + NodeKind::UnicodeEscape(u) => { + Some(MarkupNode::Text(if let Some(s) = u.character { + s.into() + } else { + let mut eco = EcoString::with_capacity(u.sequence.len() + 4); + write!(&mut eco, "\\u{{{}}}", u.sequence).unwrap(); + eco + })) + } + NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))), + NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))), + NodeKind::NonBreakingSpace => { + Some(MarkupNode::Text(EcoString::from("\u{00A0}"))) + } + NodeKind::Raw(_) => Some(MarkupNode::Raw(RawNode::cast_from(node).unwrap())), + NodeKind::Heading => { + Some(MarkupNode::Heading(HeadingNode::cast_from(node).unwrap())) + } + NodeKind::List => Some(MarkupNode::List(ListNode::cast_from(node).unwrap())), + NodeKind::Enum => Some(MarkupNode::Enum(EnumNode::cast_from(node).unwrap())), + NodeKind::Error(_, _) => None, + _ => Some(MarkupNode::Expr(Expr::cast_from(node)?)), + } + } +} + /// A raw block with optional syntax highlighting: `` `...` ``. #[derive(Debug, Clone, PartialEq)] pub struct RawNode { - /// The source code location. - pub span: Span, /// An optional identifier specifying the language to syntax-highlight in. pub lang: Option, /// The raw text, determined as the raw string between the backticks trimmed @@ -46,33 +92,97 @@ pub struct RawNode { pub block: bool, } -/// A section heading: `= Introduction`. -#[derive(Debug, Clone, PartialEq)] -pub struct HeadingNode { - /// The source code location. - pub span: Span, - /// The section depth (numer of equals signs). - pub level: usize, +impl TypedNode for RawNode { + fn cast_from(node: RedTicket) -> Option { + if let NodeKind::Raw(raw) = node.kind() { + let span = node.own().span(); + let start = span.start + raw.backticks as usize; + Some(Self { + block: raw.block, + lang: raw.lang.as_ref().and_then(|x| { + let span = Span::new(span.source, start, start + x.len()); + Ident::new(x, span) + }), + text: raw.text.clone(), + }) + } else { + None + } + } +} + +node!( + /// A section heading: `= Introduction`. + Heading => HeadingNode +); + +impl HeadingNode { /// The contents of the heading. - pub body: Markup, + pub fn body(&self) -> Markup { + self.0 + .cast_first_child() + .expect("heading node is missing markup body") + } + + /// The section depth (numer of equals signs). + pub fn level(&self) -> HeadingLevel { + self.0 + .cast_first_child() + .expect("heading node is missing heading level") + } } -/// An item in an unordered list: `- ...`. -#[derive(Debug, Clone, PartialEq)] -pub struct ListNode { - /// The source code location. - pub span: Span, +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct HeadingLevel(pub usize); + +impl TypedNode for HeadingLevel { + fn cast_from(node: RedTicket) -> Option { + if let NodeKind::HeadingLevel(l) = node.kind() { + Some(Self((*l).into())) + } else { + None + } + } +} + +node!( + /// An item in an unordered list: `- ...`. + List => ListNode +); + +impl ListNode { /// The contents of the list item. - pub body: Markup, + pub fn body(&self) -> Markup { + self.0.cast_first_child().expect("list node is missing body") + } } -/// An item in an enumeration (ordered list): `1. ...`. -#[derive(Debug, Clone, PartialEq)] -pub struct EnumNode { - /// The source code location. - pub span: Span, +node!( + /// An item in an enumeration (ordered list): `1. ...`. + Enum => EnumNode +); + +impl EnumNode { + /// The contents of the list item. + pub fn body(&self) -> Markup { + self.0.cast_first_child().expect("enumeration node is missing body") + } + /// The number, if any. - pub number: Option, - /// The contents of the list item. - pub body: Markup, + pub fn number(&self) -> EnumNumber { + self.0.cast_first_child().expect("enumeration node is missing number") + } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct EnumNumber(pub Option); + +impl TypedNode for EnumNumber { + fn cast_from(node: RedTicket) -> Option { + if let NodeKind::EnumNumbering(x) = node.kind() { + Some(Self(*x)) + } else { + None + } + } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 8dbb108de..88757f8ea 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -6,7 +6,11 @@ mod markup; mod pretty; mod span; mod token; -pub mod visit; + +use std::fmt; +use std::fmt::{Debug, Display, Formatter}; +use std::mem; +use std::rc::Rc; pub use expr::*; pub use ident::*; @@ -14,3 +18,685 @@ pub use markup::*; pub use pretty::*; pub use span::*; pub use token::*; + +use crate::geom::{AngularUnit, LengthUnit}; +use crate::source::SourceId; +use crate::util::EcoString; + +#[derive(Debug, Clone, PartialEq)] +pub enum NodeKind { + /// A left square bracket: `[`. + LeftBracket, + /// A right square bracket: `]`. + RightBracket, + /// A left curly brace: `{`. + LeftBrace, + /// A right curly brace: `}`. + RightBrace, + /// A left round parenthesis: `(`. + LeftParen, + /// A right round parenthesis: `)`. + RightParen, + /// An asterisk: `*`. + Star, + /// A comma: `,`. + Comma, + /// A semicolon: `;`. + Semicolon, + /// A colon: `:`. + Colon, + /// A plus: `+`. + Plus, + /// A hyphen: `-`. + Minus, + /// A slash: `/`. + Slash, + /// A single equals sign: `=`. + Eq, + /// Two equals signs: `==`. + EqEq, + /// An exclamation mark followed by an equals sign: `!=`. + ExclEq, + /// A less-than sign: `<`. + Lt, + /// A less-than sign followed by an equals sign: `<=`. + LtEq, + /// A greater-than sign: `>`. + Gt, + /// A greater-than sign followed by an equals sign: `>=`. + GtEq, + /// A plus followed by an equals sign: `+=`. + PlusEq, + /// A hyphen followed by an equals sign: `-=`. + HyphEq, + /// An asterisk followed by an equals sign: `*=`. + StarEq, + /// A slash followed by an equals sign: `/=`. + SlashEq, + /// Two dots: `..`. + Dots, + /// An equals sign followed by a greater-than sign: `=>`. + Arrow, + /// The `not` operator. + Not, + /// The `and` operator. + And, + /// The `or` operator. + Or, + /// The `with` operator. + With, + /// The `with` expression: `with (1)`. + WithExpr, + /// The none literal: `none`. + None, + /// The auto literal: `auto`. + Auto, + /// The `let` keyword. + Let, + /// The `if` keyword. + If, + /// The `else` keyword. + Else, + /// The `for` keyword. + For, + /// The `in` keyword. + In, + /// The `while` keyword. + While, + /// The `break` keyword. + Break, + /// The `continue` keyword. + Continue, + /// The `return` keyword. + Return, + /// The `import` keyword. + Import, + /// The `include` keyword. + Include, + /// The `from` keyword. + From, + /// One or more whitespace characters. + Space(usize), + /// A consecutive non-markup string. + Text(EcoString), + /// A slash and the letter "u" followed by a hexadecimal unicode entity + /// enclosed in curly braces: `\u{1F5FA}`. + UnicodeEscape(UnicodeEscapeToken), + /// An arbitrary number of backticks followed by inner contents, terminated + /// with the same number of backticks: `` `...` ``. + Raw(RawToken), + /// Dollar signs surrounding inner contents. + Math(MathToken), + /// A numbering: `23.`. + /// + /// Can also exist without the number: `.`. + EnumNumbering(Option), + /// An identifier: `center`. + Ident(EcoString), + /// A boolean: `true`, `false`. + Bool(bool), + /// An integer: `120`. + Int(i64), + /// A floating-point number: `1.2`, `10e-4`. + Float(f64), + /// A length: `12pt`, `3cm`. + Length(f64, LengthUnit), + /// An angle: `90deg`. + Angle(f64, AngularUnit), + /// A percentage: `50%`. + /// + /// _Note_: `50%` is stored as `50.0` here, as in the corresponding + /// [literal](super::Lit::Percent). + Percentage(f64), + /// A fraction unit: `3fr`. + Fraction(f64), + /// A quoted string: `"..."`. + Str(StrToken), + /// Two slashes followed by inner contents, terminated with a newline: + /// `//\n`. + LineComment, + /// A slash and a star followed by inner contents, terminated with a star + /// and a slash: `/**/`. + /// + /// The comment can contain nested block comments. + BlockComment, + /// A node that should never appear in a finished tree. + Never, + /// Tokens that appear in the wrong place. + Error(ErrorPosition, EcoString), + /// Template markup. + Markup, + /// A forced line break: `\`. + Linebreak, + /// A paragraph break: Two or more newlines. + Parbreak, + /// Strong text was enabled / disabled: `*`. + Strong, + /// Emphasized text was enabled / disabled: `_`. + Emph, + /// A non-breaking space: `~`. + NonBreakingSpace, + /// An en-dash: `--`. + EnDash, + /// An em-dash: `---`. + EmDash, + /// A section heading: `= Introduction`. + Heading, + /// A heading's level: `=`, `==`, `===`, etc. + HeadingLevel(u8), + /// An item in an unordered list: `- ...`. + List, + /// The bullet character of an item in an unordered list: `-`. + ListBullet, + /// An item in an enumeration (ordered list): `1. ...`. + Enum, + /// An array expression: `(1, "hi", 12cm)`. + Array, + /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. + Dict, + /// A named argument: `thickness: 3pt`. + Named, + /// A template expression: `[*Hi* there!]`. + Template, + /// A grouped expression: `(1 + 2)`. + Group, + /// A block expression: `{ let x = 1; x + 2 }`. + Block, + /// A unary operation: `-x`. + Unary, + /// A binary operation: `a + b`. + Binary, + /// An invocation of a function: `f(x, y)`. + Call, + /// A function call's argument list: `(x, y)`. + CallArgs, + /// A closure expression: `(x, y) => z`. + Closure, + /// A closure's parameters: `(x, y)`. + ClosureParams, + /// A parameter sink: `..x`. + ParameterSink, + /// A for loop expression: `for x in y { ... }`. + ForExpr, + /// A while loop expression: `while x { ... }`. + WhileExpr, + /// An if expression: `if x { ... }`. + IfExpr, + /// A let expression: `let x = 1`. + LetExpr, + /// A for loop's destructuring pattern: `x` or `x, y`. + ForPattern, + /// The import expression: `import x from "foo.typ"`. + ImportExpr, + /// Items to import: `a, b, c`. + ImportItems, + /// The include expression: `include "foo.typ"`. + IncludeExpr, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ErrorPosition { + /// At the start of the node. + Start, + /// Over the full width of the node. + Full, + /// At the end of the node. + End, +} + +impl Display for NodeKind { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.pad(match self { + Self::LeftBracket => "opening bracket", + Self::RightBracket => "closing bracket", + Self::LeftBrace => "opening brace", + Self::RightBrace => "closing brace", + Self::LeftParen => "opening paren", + Self::RightParen => "closing paren", + Self::Star => "star", + Self::Comma => "comma", + Self::Semicolon => "semicolon", + Self::Colon => "colon", + Self::Plus => "plus", + Self::Minus => "minus", + Self::Slash => "slash", + Self::Eq => "assignment operator", + Self::EqEq => "equality operator", + Self::ExclEq => "inequality operator", + Self::Lt => "less-than operator", + Self::LtEq => "less-than or equal operator", + Self::Gt => "greater-than operator", + Self::GtEq => "greater-than or equal operator", + Self::PlusEq => "add-assign operator", + Self::HyphEq => "subtract-assign operator", + Self::StarEq => "multiply-assign operator", + Self::SlashEq => "divide-assign operator", + Self::Dots => "dots", + Self::Arrow => "arrow", + Self::Not => "operator `not`", + Self::And => "operator `and`", + Self::Or => "operator `or`", + Self::With => "operator `with`", + Self::WithExpr => "`with` expression", + Self::None => "`none`", + Self::Auto => "`auto`", + Self::Let => "keyword `let`", + Self::If => "keyword `if`", + Self::Else => "keyword `else`", + Self::For => "keyword `for`", + Self::In => "keyword `in`", + Self::While => "keyword `while`", + Self::Break => "keyword `break`", + Self::Continue => "keyword `continue`", + Self::Return => "keyword `return`", + Self::Import => "keyword `import`", + Self::Include => "keyword `include`", + Self::From => "keyword `from`", + Self::Space(_) => "space", + Self::Math(_) => "math formula", + Self::EnumNumbering(_) => "numbering", + Self::Str(_) => "string", + Self::Never => "a node that should not be here", + Self::LineComment => "line comment", + Self::BlockComment => "block comment", + Self::Markup => "markup", + Self::Linebreak => "forced linebreak", + Self::Parbreak => "paragraph break", + Self::Strong => "strong", + Self::Emph => "emphasis", + Self::Text(_) => "text", + Self::NonBreakingSpace => "non-breaking space", + Self::EnDash => "en dash", + Self::EmDash => "em dash", + Self::UnicodeEscape(_) => "unicode escape sequence", + Self::Raw(_) => "raw block", + Self::Heading => "heading", + Self::HeadingLevel(_) => "heading level", + Self::List => "list", + Self::ListBullet => "list bullet", + Self::Enum => "enum", + Self::Ident(_) => "identifier", + Self::Bool(_) => "boolean", + Self::Int(_) => "integer", + Self::Float(_) => "float", + Self::Length(_, _) => "length", + Self::Angle(_, _) => "angle", + Self::Percentage(_) => "percentage", + Self::Fraction(_) => "`fr` value", + Self::Array => "array", + Self::Dict => "dictionary", + Self::Named => "named argument", + Self::Template => "template", + Self::Group => "group", + Self::Block => "block", + Self::Unary => "unary expression", + Self::Binary => "binary expression", + Self::Call => "call", + Self::CallArgs => "call arguments", + Self::Closure => "closure", + Self::ClosureParams => "closure parameters", + Self::ParameterSink => "parameter sink", + Self::ForExpr => "for-loop expression", + Self::WhileExpr => "while-loop expression", + Self::IfExpr => "if expression", + Self::LetExpr => "let expression", + Self::ForPattern => "for-loop destructuring pattern", + Self::ImportExpr => "import expression", + Self::ImportItems => "import items", + Self::IncludeExpr => "include expression", + Self::Error(_, src) => match src.as_str() { + "*/" => "end of block comment", + _ => "invalid token", + }, + }) + } +} + +impl NodeKind { + pub fn is_parenthesis(&self) -> bool { + match self { + Self::LeftParen => true, + Self::RightParen => true, + _ => false, + } + } + + pub fn is_bracket(&self) -> bool { + match self { + Self::LeftBracket => true, + Self::RightBracket => true, + _ => false, + } + } + + pub fn is_brace(&self) -> bool { + match self { + Self::LeftBrace => true, + Self::RightBrace => true, + _ => false, + } + } + + pub fn is_error(&self) -> bool { + matches!(self, NodeKind::Never | NodeKind::Error(_, _)) + } +} + +/// A syntactical node. +#[derive(Clone, PartialEq)] +pub struct GreenNode { + /// Node metadata. + meta: GreenData, + /// This node's children, losslessly make up this node. + children: Vec, +} + +/// Data shared between [`GreenNode`]s and [`GreenToken`]s. +#[derive(Clone, PartialEq)] +pub struct GreenData { + /// What kind of node this is (each kind would have its own struct in a + /// strongly typed AST). + kind: NodeKind, + /// The byte length of the node in the source. + len: usize, + /// Whether this node or any of its children are erroneous. + has_error: bool, +} + +impl GreenData { + pub fn new(kind: NodeKind, len: usize) -> Self { + Self { len, has_error: kind.is_error(), kind } + } + + pub fn kind(&self) -> &NodeKind { + &self.kind + } + + pub fn len(&self) -> usize { + self.len + } + + pub fn has_error(&self) -> bool { + self.has_error + } +} + +impl From for Green { + fn from(token: GreenData) -> Self { + Self::Token(token) + } +} + +/// Children of a [`GreenNode`]. +#[derive(Clone, PartialEq)] +pub enum Green { + /// A terminal owned token. + Token(GreenData), + /// A non-terminal node in an Rc. + Node(Rc), +} + +impl Green { + fn meta(&self) -> &GreenData { + match self { + Green::Token(t) => &t, + Green::Node(n) => &n.meta, + } + } + + pub fn kind(&self) -> &NodeKind { + self.meta().kind() + } + + pub fn len(&self) -> usize { + self.meta().len() + } + + pub fn has_error(&self) -> bool { + self.meta().has_error() + } + + pub fn children(&self) -> &[Green] { + match self { + Green::Token(_) => &[], + Green::Node(n) => &n.children(), + } + } +} + +impl GreenNode { + pub fn new(kind: NodeKind, len: usize) -> Self { + Self { + meta: GreenData::new(kind, len), + children: Vec::new(), + } + } + + pub fn with_children( + kind: NodeKind, + len: usize, + children: impl Iterator>, + ) -> Self { + let mut meta = GreenData::new(kind, len); + let children = children + .map(|x| { + let x = x.into(); + meta.has_error |= x.has_error(); + x + }) + .collect(); + Self { meta, children } + } + + pub fn with_child(kind: NodeKind, len: usize, child: impl Into) -> Self { + Self::with_children(kind, len, std::iter::once(child.into())) + } + + pub fn children(&self) -> &[Green] { + &self.children + } +} + +impl From for Green { + fn from(node: GreenNode) -> Self { + Rc::new(node).into() + } +} + +impl From> for Green { + fn from(node: Rc) -> Self { + Self::Node(node) + } +} + +impl Default for Green { + fn default() -> Self { + Self::Token(GreenData::new(NodeKind::Never, 0)) + } +} + +impl Debug for Green { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:?}: {}", self.kind(), self.len())?; + if let Self::Node(n) = self { + if !n.children.is_empty() { + f.write_str(" ")?; + f.debug_list().entries(&n.children).finish()?; + } + } + + Ok(()) + } +} + +#[derive(Copy, Clone, PartialEq)] +pub struct RedTicket<'a> { + id: SourceId, + offset: usize, + green: &'a Green, +} + +impl<'a> RedTicket<'a> { + pub fn own(self) -> RedNode { + RedNode { + id: self.id, + offset: self.offset, + green: self.green.clone(), + } + } + + pub fn kind(&self) -> &NodeKind { + self.green.kind() + } + + + pub fn cast(self) -> Option + where + T: TypedNode, + { + T::cast_from(self) + } +} + +#[derive(Clone, PartialEq)] +pub struct RedNode { + id: SourceId, + offset: usize, + green: Green, +} + +impl RedNode { + pub fn new_root(root: Rc, id: SourceId) -> Self { + Self { id, offset: 0, green: root.into() } + } + + pub fn span(&self) -> Span { + Span::new(self.id, self.offset, self.offset + self.green.len()) + } + + pub fn len(&self) -> usize { + self.green.len() + } + + pub fn kind(&self) -> &NodeKind { + self.green.kind() + } + + pub fn children<'a>(&'a self) -> impl Iterator> + Clone + 'a { + let children = match &self.green { + Green::Node(node) => node.children(), + Green::Token(_) => &[], + }; + + let mut offset = self.offset; + children.iter().map(move |green_child| { + let child_offset = offset; + offset += green_child.len(); + RedTicket { + id: self.id, + offset: child_offset, + green: &green_child, + } + }) + } + + pub fn has_error(&self) -> bool { + self.green.has_error() + } + + pub fn errors(&self) -> Vec<(Span, EcoString)> { + if !self.green.has_error() { + return vec![]; + } + + if let NodeKind::Error(pos, msg) = self.kind() { + let span = match pos { + ErrorPosition::Start => self.span().at_start(), + ErrorPosition::Full => self.span(), + ErrorPosition::End => self.span().at_end(), + }; + + vec![(span, msg.clone())] + } else if let NodeKind::Never = self.kind() { + vec![(self.span(), "found a never node".into())] + } else { + self.children() + .filter(|ticket| ticket.green.has_error()) + .flat_map(|ticket| ticket.own().errors()) + .collect() + } + } + + pub fn ticket<'a>(&'a self) -> RedTicket<'a> { + RedTicket { + id: self.id, + offset: self.offset, + green: &self.green, + } + } + + pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { + self.children() + .find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind)) + .map(RedTicket::own) + } + + pub(crate) fn cast_first_child(&self) -> Option { + self.children().find_map(RedTicket::cast) + } + + pub(crate) fn cast_last_child(&self) -> Option { + self.children().filter_map(RedTicket::cast).last() + } +} + +impl Debug for RedNode { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:?}: {:?}", self.kind(), self.span())?; + let children = self.children().collect::>(); + if !children.is_empty() { + f.write_str(" ")?; + f.debug_list() + .entries(children.into_iter().map(RedTicket::own)) + .finish()?; + } + Ok(()) + } +} + +pub trait TypedNode: Sized { + /// Performs the conversion. + fn cast_from(value: RedTicket) -> Option; +} + +#[macro_export] +macro_rules! node { + (#[doc = $doc:expr] $name:ident) => { + node!(#[doc = $doc] $name => $name); + }; + (#[doc = $doc:expr] $variant:ident => $name:ident) => { + #[doc = $doc] + #[derive(Debug, Clone, PartialEq)] + pub struct $name(RedNode); + + impl TypedNode for $name { + fn cast_from(node: RedTicket) -> Option { + if node.kind() != &NodeKind::$variant { + return None; + } + + Some(Self(node.own())) + } + } + + impl $name { + pub fn span(&self) -> Span { + self.0.span() + } + + pub fn underlying(&self) -> RedTicket { + self.0.ticket() + } + } + }; +} diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index 3d02f39f7..b1c7e02bd 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -88,10 +88,10 @@ impl Pretty for MarkupNode { match self { // TODO: Handle escaping. Self::Space => p.push(' '), - Self::Linebreak(_) => p.push_str(r"\"), - Self::Parbreak(_) => p.push_str("\n\n"), - Self::Strong(_) => p.push('*'), - Self::Emph(_) => p.push('_'), + Self::Linebreak => p.push_str(r"\"), + Self::Parbreak => p.push_str("\n\n"), + Self::Strong => p.push('*'), + Self::Emph => p.push('_'), Self::Text(text) => p.push_str(text), Self::Raw(raw) => raw.pretty(p), Self::Heading(heading) => heading.pretty(p), @@ -165,28 +165,28 @@ impl Pretty for RawNode { impl Pretty for HeadingNode { fn pretty(&self, p: &mut Printer) { - for _ in 0 .. self.level { + for _ in 0 .. self.level().0 { p.push('='); } p.push(' '); - self.body.pretty(p); + self.body().pretty(p); } } impl Pretty for ListNode { fn pretty(&self, p: &mut Printer) { p.push_str("- "); - self.body.pretty(p); + self.body().pretty(p); } } impl Pretty for EnumNode { fn pretty(&self, p: &mut Printer) { - if let Some(number) = self.number { + if let Some(number) = self.number().0 { write!(p, "{}", number).unwrap(); } p.push_str(". "); - self.body.pretty(p); + self.body().pretty(p); } } @@ -235,8 +235,10 @@ impl Pretty for Lit { impl Pretty for ArrayExpr { fn pretty(&self, p: &mut Printer) { p.push('('); - p.join(&self.items, ", ", |item, p| item.pretty(p)); - if self.items.len() == 1 { + + let items = self.items(); + p.join(&items, ", ", |item, p| item.pretty(p)); + if items.len() == 1 { p.push(','); } p.push(')'); @@ -246,10 +248,12 @@ impl Pretty for ArrayExpr { impl Pretty for DictExpr { fn pretty(&self, p: &mut Printer) { p.push('('); - if self.items.is_empty() { + + let items = self.items(); + if items.is_empty() { p.push(':'); } else { - p.join(&self.items, ", ", |named, p| named.pretty(p)); + p.join(&items, ", ", |named, p| named.pretty(p)); } p.push(')'); } @@ -257,16 +261,16 @@ impl Pretty for DictExpr { impl Pretty for Named { fn pretty(&self, p: &mut Printer) { - self.name.pretty(p); + self.name().pretty(p); p.push_str(": "); - self.expr.pretty(p); + self.expr().pretty(p); } } impl Pretty for TemplateExpr { fn pretty(&self, p: &mut Printer) { p.push('['); - self.body.pretty(p); + self.body().pretty(p); p.push(']'); } } @@ -274,7 +278,7 @@ impl Pretty for TemplateExpr { impl Pretty for GroupExpr { fn pretty(&self, p: &mut Printer) { p.push('('); - self.expr.pretty(p); + self.expr().pretty(p); p.push(')'); } } @@ -282,11 +286,13 @@ impl Pretty for GroupExpr { impl Pretty for BlockExpr { fn pretty(&self, p: &mut Printer) { p.push('{'); - if self.exprs.len() > 1 { + + let exprs = self.exprs(); + if exprs.len() > 1 { p.push(' '); } - p.join(&self.exprs, "; ", |expr, p| expr.pretty(p)); - if self.exprs.len() > 1 { + p.join(&exprs, "; ", |expr, p| expr.pretty(p)); + if exprs.len() > 1 { p.push(' '); } p.push('}'); @@ -295,11 +301,12 @@ impl Pretty for BlockExpr { impl Pretty for UnaryExpr { fn pretty(&self, p: &mut Printer) { - self.op.pretty(p); - if self.op == UnOp::Not { + let op = self.op(); + op.pretty(p); + if op == UnOp::Not { p.push(' '); } - self.expr.pretty(p); + self.expr().pretty(p); } } @@ -311,11 +318,11 @@ impl Pretty for UnOp { impl Pretty for BinaryExpr { fn pretty(&self, p: &mut Printer) { - self.lhs.pretty(p); + self.lhs().pretty(p); p.push(' '); - self.op.pretty(p); + self.op().pretty(p); p.push(' '); - self.rhs.pretty(p); + self.rhs().pretty(p); } } @@ -327,7 +334,7 @@ impl Pretty for BinOp { impl Pretty for CallExpr { fn pretty(&self, p: &mut Printer) { - self.callee.pretty(p); + self.callee().pretty(p); let mut write_args = |items: &[CallArg]| { p.push('('); @@ -335,25 +342,26 @@ impl Pretty for CallExpr { p.push(')'); }; - match self.args.items.as_slice() { - // This can be moved behind the arguments. - // - // Example: Transforms "#v(a, [b])" => "#v(a)[b]". - [head @ .., CallArg::Pos(Expr::Template(template))] => { - if !head.is_empty() { - write_args(head); - } - template.pretty(p); - } + let arg_list = self.args(); + let args = arg_list.items(); - items => write_args(items), + if let Some(Expr::Template(template)) = args + .last() + .and_then(|x| if let CallArg::Pos(arg) = x { Some(arg) } else { None }) + { + if args.len() > 1 { + write_args(&args[0 .. args.len() - 1]); + } + template.pretty(p); + } else { + write_args(&args); } } } impl Pretty for CallArgs { fn pretty(&self, p: &mut Printer) { - p.join(&self.items, ", ", |item, p| item.pretty(p)); + p.join(&self.items(), ", ", |item, p| item.pretty(p)); } } @@ -372,15 +380,15 @@ impl Pretty for CallArg { impl Pretty for ClosureExpr { fn pretty(&self, p: &mut Printer) { - if let [param] = self.params.as_slice() { + if let [param] = self.params().as_slice() { param.pretty(p); } else { p.push('('); - p.join(self.params.iter(), ", ", |item, p| item.pretty(p)); + p.join(self.params().iter(), ", ", |item, p| item.pretty(p)); p.push(')'); } p.push_str(" => "); - self.body.pretty(p); + self.body().pretty(p); } } @@ -399,9 +407,9 @@ impl Pretty for ClosureParam { impl Pretty for WithExpr { fn pretty(&self, p: &mut Printer) { - self.callee.pretty(p); + self.callee().pretty(p); p.push_str(" with ("); - self.args.pretty(p); + self.args().pretty(p); p.push(')'); } } @@ -409,13 +417,13 @@ impl Pretty for WithExpr { impl Pretty for LetExpr { fn pretty(&self, p: &mut Printer) { p.push_str("let "); - self.binding.pretty(p); - if let Some(Expr::Closure(closure)) = &self.init { + self.binding().pretty(p); + if let Some(Expr::Closure(closure)) = &self.init() { p.push('('); - p.join(closure.params.iter(), ", ", |item, p| item.pretty(p)); + p.join(closure.params().iter(), ", ", |item, p| item.pretty(p)); p.push_str(") = "); - closure.body.pretty(p); - } else if let Some(init) = &self.init { + closure.body().pretty(p); + } else if let Some(init) = &self.init() { p.push_str(" = "); init.pretty(p); } @@ -425,10 +433,10 @@ impl Pretty for LetExpr { impl Pretty for IfExpr { fn pretty(&self, p: &mut Printer) { p.push_str("if "); - self.condition.pretty(p); + self.condition().pretty(p); p.push(' '); - self.if_body.pretty(p); - if let Some(expr) = &self.else_body { + self.if_body().pretty(p); + if let Some(expr) = &self.else_body() { p.push_str(" else "); expr.pretty(p); } @@ -438,42 +446,40 @@ impl Pretty for IfExpr { impl Pretty for WhileExpr { fn pretty(&self, p: &mut Printer) { p.push_str("while "); - self.condition.pretty(p); + self.condition().pretty(p); p.push(' '); - self.body.pretty(p); + self.body().pretty(p); } } impl Pretty for ForExpr { fn pretty(&self, p: &mut Printer) { p.push_str("for "); - self.pattern.pretty(p); + self.pattern().pretty(p); p.push_str(" in "); - self.iter.pretty(p); + self.iter().pretty(p); p.push(' '); - self.body.pretty(p); + self.body().pretty(p); } } impl Pretty for ForPattern { fn pretty(&self, p: &mut Printer) { - match self { - Self::Value(v) => v.pretty(p), - Self::KeyValue(k, v) => { - k.pretty(p); - p.push_str(", "); - v.pretty(p); - } + if let Some(key) = self.key() { + key.pretty(p); + p.push_str(", "); } + + self.value().pretty(p); } } impl Pretty for ImportExpr { fn pretty(&self, p: &mut Printer) { p.push_str("import "); - self.imports.pretty(p); + self.imports().pretty(p); p.push_str(" from "); - self.path.pretty(p); + self.path().pretty(p); } } @@ -489,7 +495,7 @@ impl Pretty for Imports { impl Pretty for IncludeExpr { fn pretty(&self, p: &mut Printer) { p.push_str("include "); - self.path.pretty(p); + self.path().pretty(p); } } @@ -502,7 +508,6 @@ impl Pretty for Ident { #[cfg(test)] mod tests { use super::*; - use crate::parse::parse; use crate::source::SourceFile; #[track_caller] @@ -513,7 +518,7 @@ mod tests { #[track_caller] fn test_parse(src: &str, expected: &str) { let source = SourceFile::detached(src); - let ast = parse(&source).unwrap(); + let ast: Markup = source.ast().unwrap(); let found = pretty(&ast); if found != expected { println!("tree: {:#?}", ast); diff --git a/src/syntax/span.rs b/src/syntax/span.rs index bfb9e755c..ee7cba4c2 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -109,6 +109,11 @@ impl Span { *self = self.join(other) } + /// Test whether a position is within the span. + pub fn contains_pos(&self, pos: Pos) -> bool { + self.start <= pos && self.end >= pos + } + /// Test whether one span complete contains the other span. pub fn contains(self, other: Self) -> bool { self.source == other.source && self.start <= other.start && self.end >= other.end @@ -118,6 +123,16 @@ impl Span { pub fn to_range(self) -> Range { self.start.to_usize() .. self.end.to_usize() } + + /// A new span at the position of this span's start. + pub fn at_start(&self) -> Span { + Self::at(self.source, self.start) + } + + /// A new span at the position of this span's end. + pub fn at_end(&self) -> Span { + Self::at(self.source, self.end) + } } impl Debug for Span { diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 22dd104b0..49613667e 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -1,188 +1,38 @@ -use crate::geom::{AngularUnit, LengthUnit}; - -/// A minimal semantic entity of source code. -#[derive(Debug, Copy, Clone, PartialEq)] -pub enum Token<'s> { - /// A left square bracket: `[`. - LeftBracket, - /// A right square bracket: `]`. - RightBracket, - /// A left curly brace: `{`. - LeftBrace, - /// A right curly brace: `}`. - RightBrace, - /// A left round parenthesis: `(`. - LeftParen, - /// A right round parenthesis: `)`. - RightParen, - /// An asterisk: `*`. - Star, - /// An underscore: `_`. - Underscore, - /// A tilde: `~`. - Tilde, - /// Two hyphens: `--`. - HyphHyph, - /// Three hyphens: `---`. - HyphHyphHyph, - /// A backslash followed by nothing or whitespace: `\`. - Backslash, - /// A comma: `,`. - Comma, - /// A semicolon: `;`. - Semicolon, - /// A colon: `:`. - Colon, - /// A plus: `+`. - Plus, - /// A hyphen: `-`. - Hyph, - /// A slash: `/`. - Slash, - /// A single equals sign: `=`. - Eq, - /// Two equals signs: `==`. - EqEq, - /// An exclamation mark followed by an equals sign: `!=`. - ExclEq, - /// A less-than sign: `<`. - Lt, - /// A less-than sign followed by an equals sign: `<=`. - LtEq, - /// A greater-than sign: `>`. - Gt, - /// A greater-than sign followed by an equals sign: `>=`. - GtEq, - /// A plus followed by an equals sign: `+=`. - PlusEq, - /// A hyphen followed by an equals sign: `-=`. - HyphEq, - /// An asterisk followed by an equals sign: `*=`. - StarEq, - /// A slash followed by an equals sign: `/=`. - SlashEq, - /// Two dots: `..`. - Dots, - /// An equals sign followed by a greater-than sign: `=>`. - Arrow, - /// The `not` operator. - Not, - /// The `and` operator. - And, - /// The `or` operator. - Or, - /// The `with` operator. - With, - /// The none literal: `none`. - None, - /// The auto literal: `auto`. - Auto, - /// The `let` keyword. - Let, - /// The `if` keyword. - If, - /// The `else` keyword. - Else, - /// The `for` keyword. - For, - /// The `in` keyword. - In, - /// The `while` keyword. - While, - /// The `break` keyword. - Break, - /// The `continue` keyword. - Continue, - /// The `return` keyword. - Return, - /// The `import` keyword. - Import, - /// The `include` keyword. - Include, - /// The `from` keyword. - From, - /// One or more whitespace characters. - /// - /// The contained `usize` denotes the number of newlines that were contained - /// in the whitespace. - Space(usize), - /// A consecutive non-markup string. - Text(&'s str), - /// A slash and the letter "u" followed by a hexadecimal unicode entity - /// enclosed in curly braces: `\u{1F5FA}`. - UnicodeEscape(UnicodeEscapeToken<'s>), - /// An arbitrary number of backticks followed by inner contents, terminated - /// with the same number of backticks: `` `...` ``. - Raw(RawToken<'s>), - /// One or two dollar signs followed by inner contents, terminated with the - /// same number of dollar signs. - Math(MathToken<'s>), - /// A numbering: `23.`. - /// - /// Can also exist without the number: `.`. - Numbering(Option), - /// An identifier: `center`. - Ident(&'s str), - /// A boolean: `true`, `false`. - Bool(bool), - /// An integer: `120`. - Int(i64), - /// A floating-point number: `1.2`, `10e-4`. - Float(f64), - /// A length: `12pt`, `3cm`. - Length(f64, LengthUnit), - /// An angle: `90deg`. - Angle(f64, AngularUnit), - /// A percentage: `50%`. - /// - /// _Note_: `50%` is stored as `50.0` here, as in the corresponding - /// [literal](super::Lit::Percent). - Percent(f64), - /// A fraction unit: `3fr`. - Fraction(f64), - /// A quoted string: `"..."`. - Str(StrToken<'s>), - /// Two slashes followed by inner contents, terminated with a newline: - /// `//\n`. - LineComment(&'s str), - /// A slash and a star followed by inner contents, terminated with a star - /// and a slash: `/**/`. - /// - /// The comment can contain nested block comments. - BlockComment(&'s str), - /// Things that are not valid tokens. - Invalid(&'s str), -} +use crate::util::EcoString; /// A quoted string token: `"..."`. -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct StrToken<'s> { +#[derive(Debug, Clone, PartialEq)] +pub struct StrToken { /// The string inside the quotes. /// /// _Note_: If the string contains escape sequences these are not yet /// applied to be able to just store a string slice here instead of /// a `String`. The resolving is done later in the parser. - pub string: &'s str, + pub string: EcoString, /// Whether the closing quote was present. pub terminated: bool, } /// A raw block token: `` `...` ``. -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct RawToken<'s> { - /// The raw text between the backticks. - pub text: &'s str, +#[derive(Debug, Clone, PartialEq)] +pub struct RawToken { + /// The raw text in the block. + pub text: EcoString, + /// The programming language of the raw text. + pub lang: Option, /// The number of opening backticks. - pub backticks: usize, + pub backticks: u8, /// Whether all closing backticks were present. pub terminated: bool, + /// Whether to display this as a block. + pub block: bool, } /// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`. -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct MathToken<'s> { +#[derive(Debug, Clone, PartialEq)] +pub struct MathToken { /// The formula between the dollars. - pub formula: &'s str, + pub formula: EcoString, /// Whether the formula is display-level, that is, it is surrounded by /// `$[..]`. pub display: bool, @@ -191,86 +41,21 @@ pub struct MathToken<'s> { } /// A unicode escape sequence token: `\u{1F5FA}`. -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct UnicodeEscapeToken<'s> { +#[derive(Debug, Clone, PartialEq)] +pub struct UnicodeEscapeToken { /// The escape sequence between the braces. - pub sequence: &'s str, + pub sequence: EcoString, + /// The resulting unicode character. + pub character: Option, /// Whether the closing brace was present. pub terminated: bool, } -impl<'s> Token<'s> { - /// The English name of this token for use in error messages. - pub fn name(self) -> &'static str { - match self { - Self::LeftBracket => "opening bracket", - Self::RightBracket => "closing bracket", - Self::LeftBrace => "opening brace", - Self::RightBrace => "closing brace", - Self::LeftParen => "opening paren", - Self::RightParen => "closing paren", - Self::Star => "star", - Self::Underscore => "underscore", - Self::Tilde => "tilde", - Self::HyphHyph => "en dash", - Self::HyphHyphHyph => "em dash", - Self::Backslash => "backslash", - Self::Comma => "comma", - Self::Semicolon => "semicolon", - Self::Colon => "colon", - Self::Plus => "plus", - Self::Hyph => "minus", - Self::Slash => "slash", - Self::Eq => "assignment operator", - Self::EqEq => "equality operator", - Self::ExclEq => "inequality operator", - Self::Lt => "less-than operator", - Self::LtEq => "less-than or equal operator", - Self::Gt => "greater-than operator", - Self::GtEq => "greater-than or equal operator", - Self::PlusEq => "add-assign operator", - Self::HyphEq => "subtract-assign operator", - Self::StarEq => "multiply-assign operator", - Self::SlashEq => "divide-assign operator", - Self::Dots => "dots", - Self::Arrow => "arrow", - Self::Not => "operator `not`", - Self::And => "operator `and`", - Self::Or => "operator `or`", - Self::With => "operator `with`", - Self::None => "`none`", - Self::Auto => "`auto`", - Self::Let => "keyword `let`", - Self::If => "keyword `if`", - Self::Else => "keyword `else`", - Self::For => "keyword `for`", - Self::In => "keyword `in`", - Self::While => "keyword `while`", - Self::Break => "keyword `break`", - Self::Continue => "keyword `continue`", - Self::Return => "keyword `return`", - Self::Import => "keyword `import`", - Self::Include => "keyword `include`", - Self::From => "keyword `from`", - Self::Space(_) => "space", - Self::Text(_) => "text", - Self::UnicodeEscape(_) => "unicode escape sequence", - Self::Raw(_) => "raw block", - Self::Math(_) => "math formula", - Self::Numbering(_) => "numbering", - Self::Ident(_) => "identifier", - Self::Bool(_) => "boolean", - Self::Int(_) => "integer", - Self::Float(_) => "float", - Self::Length(_, _) => "length", - Self::Angle(_, _) => "angle", - Self::Percent(_) => "percentage", - Self::Fraction(_) => "`fr` value", - Self::Str(_) => "string", - Self::LineComment(_) => "line comment", - Self::BlockComment(_) => "block comment", - Self::Invalid("*/") => "end of block comment", - Self::Invalid(_) => "invalid token", - } - } +/// A unit-bound number token: `1.2em`. +#[derive(Debug, Clone, PartialEq)] +pub struct UnitToken { + /// The number part. + pub number: std::ops::Range, + /// The unit part. + pub unit: std::ops::Range, } diff --git a/src/syntax/visit.rs b/src/syntax/visit.rs deleted file mode 100644 index 40e8eb93c..000000000 --- a/src/syntax/visit.rs +++ /dev/null @@ -1,263 +0,0 @@ -//! Mutable and immutable syntax tree traversal. - -use super::*; - -/// Implement the immutable and the mutable visitor version. -macro_rules! impl_visitors { - ($($name:ident($($tts:tt)*) $body:block)*) => { - macro_rules! r { - (rc: $x:expr) => { $x.as_ref() }; - ($x:expr) => { &$x }; - } - - impl_visitor! { - Visit, - immutable, - immutably, - [$(($name($($tts)*) $body))*] - } - - macro_rules! r { - (rc: $x:expr) => { std::rc::Rc::make_mut(&mut $x) }; - ($x:expr) => { &mut $x }; - } - - impl_visitor! { - VisitMut, - mutable, - mutably, - [$(($name($($tts)*) $body mut))*] mut - } - }; -} - -/// Implement an immutable or mutable visitor. -macro_rules! impl_visitor { - ( - $visit:ident, - $mutability:ident, - $adjective:ident, - [$(( - $name:ident($v:ident, $node:ident: $ty:ty) - $body:block - $($fmut:tt)? - ))*] - $($mut:tt)? - ) => { - #[doc = concat!("Visit syntax trees ", stringify!($adjective), ".")] - pub trait $visit<'ast> { - /// Visit a definition of a binding. - /// - /// Bindings are, for example, left-hand side of let expressions, - /// and key/value patterns in for loops. - fn visit_binding(&mut self, _: &'ast $($mut)? Ident) {} - - /// Visit the entry into a scope. - fn visit_enter(&mut self) {} - - /// Visit the exit from a scope. - fn visit_exit(&mut self) {} - - $(fn $name(&mut self, $node: &'ast $($fmut)? $ty) { - $mutability::$name(self, $node); - })* - } - - #[doc = concat!("Visitor functions that are ", stringify!($mutability), ".")] - pub mod $mutability { - use super::*; - $( - #[doc = concat!("Visit a node of type [`", stringify!($ty), "`].")] - pub fn $name<'ast, V>($v: &mut V, $node: &'ast $($fmut)? $ty) - where - V: $visit<'ast> + ?Sized - $body - )* - } - }; -} - -impl_visitors! { - visit_tree(v, markup: Markup) { - for node in markup { - v.visit_node(node); - } - } - - visit_node(v, node: MarkupNode) { - match node { - MarkupNode::Space => {} - MarkupNode::Linebreak(_) => {} - MarkupNode::Parbreak(_) => {} - MarkupNode::Strong(_) => {} - MarkupNode::Emph(_) => {} - MarkupNode::Text(_) => {} - MarkupNode::Raw(_) => {} - MarkupNode::Heading(n) => v.visit_heading(n), - MarkupNode::List(n) => v.visit_list(n), - MarkupNode::Enum(n) => v.visit_enum(n), - MarkupNode::Expr(n) => v.visit_expr(n), - } - } - - visit_heading(v, heading: HeadingNode) { - v.visit_tree(r!(heading.body)); - } - - visit_list(v, list: ListNode) { - v.visit_tree(r!(list.body)); - } - - visit_enum(v, enum_: EnumNode) { - v.visit_tree(r!(enum_.body)); - } - - visit_expr(v, expr: Expr) { - match expr { - Expr::Ident(_) => {} - Expr::Lit(_) => {}, - Expr::Array(e) => v.visit_array(e), - Expr::Dict(e) => v.visit_dict(e), - Expr::Template(e) => v.visit_template(e), - Expr::Group(e) => v.visit_group(e), - Expr::Block(e) => v.visit_block(e), - Expr::Unary(e) => v.visit_unary(e), - Expr::Binary(e) => v.visit_binary(e), - Expr::Call(e) => v.visit_call(e), - Expr::Closure(e) => v.visit_closure(e), - Expr::With(e) => v.visit_with(e), - Expr::Let(e) => v.visit_let(e), - Expr::If(e) => v.visit_if(e), - Expr::While(e) => v.visit_while(e), - Expr::For(e) => v.visit_for(e), - Expr::Import(e) => v.visit_import(e), - Expr::Include(e) => v.visit_include(e), - } - } - - visit_array(v, array: ArrayExpr) { - for expr in r!(array.items) { - v.visit_expr(expr); - } - } - - visit_dict(v, dict: DictExpr) { - for named in r!(dict.items) { - v.visit_expr(r!(named.expr)); - } - } - - visit_template(v, template: TemplateExpr) { - v.visit_enter(); - v.visit_tree(r!(template.body)); - v.visit_exit(); - } - - visit_group(v, group: GroupExpr) { - v.visit_expr(r!(group.expr)); - } - - visit_block(v, block: BlockExpr) { - v.visit_enter(); - for expr in r!(block.exprs) { - v.visit_expr(expr); - } - v.visit_exit(); - } - - visit_binary(v, binary: BinaryExpr) { - v.visit_expr(r!(binary.lhs)); - v.visit_expr(r!(binary.rhs)); - } - - visit_unary(v, unary: UnaryExpr) { - v.visit_expr(r!(unary.expr)); - } - - visit_call(v, call: CallExpr) { - v.visit_expr(r!(call.callee)); - v.visit_args(r!(call.args)); - } - - visit_args(v, args: CallArgs) { - for arg in r!(args.items) { - v.visit_arg(arg); - } - } - - visit_arg(v, arg: CallArg) { - match arg { - CallArg::Pos(expr) => v.visit_expr(expr), - CallArg::Named(named) => v.visit_expr(r!(named.expr)), - CallArg::Spread(expr) => v.visit_expr(expr), - } - } - - visit_closure(v, closure: ClosureExpr) { - for param in r!(closure.params) { - v.visit_param(param); - } - v.visit_expr(r!(rc: closure.body)); - } - - visit_param(v, param: ClosureParam) { - match param { - ClosureParam::Pos(binding) => v.visit_binding(binding), - ClosureParam::Named(named) => { - v.visit_binding(r!(named.name)); - v.visit_expr(r!(named.expr)); - } - ClosureParam::Sink(binding) => v.visit_binding(binding), - } - } - - visit_with(v, with_expr: WithExpr) { - v.visit_expr(r!(with_expr.callee)); - v.visit_args(r!(with_expr.args)); - } - - visit_let(v, let_expr: LetExpr) { - if let Some(init) = r!(let_expr.init) { - v.visit_expr(init); - } - v.visit_binding(r!(let_expr.binding)); - } - - visit_if(v, if_expr: IfExpr) { - v.visit_expr(r!(if_expr.condition)); - v.visit_expr(r!(if_expr.if_body)); - if let Some(body) = r!(if_expr.else_body) { - v.visit_expr(body); - } - } - - visit_while(v, while_expr: WhileExpr) { - v.visit_expr(r!(while_expr.condition)); - v.visit_expr(r!(while_expr.body)); - } - - visit_for(v, for_expr: ForExpr) { - v.visit_expr(r!(for_expr.iter)); - match r!(for_expr.pattern) { - ForPattern::Value(value) => v.visit_binding(value), - ForPattern::KeyValue(key, value) => { - v.visit_binding(key); - v.visit_binding(value); - } - } - v.visit_expr(r!(for_expr.body)); - } - - visit_import(v, import_expr: ImportExpr) { - v.visit_expr(r!(import_expr.path)); - if let Imports::Idents(idents) = r!(import_expr.imports) { - for ident in idents { - v.visit_binding(ident); - } - } - } - - visit_include(v, include_expr: IncludeExpr) { - v.visit_expr(r!(include_expr.path)); - } -} diff --git a/tests/typ/code/array.typ b/tests/typ/code/array.typ index df37dd454..44b8b5979 100644 --- a/tests/typ/code/array.typ +++ b/tests/typ/code/array.typ @@ -72,7 +72,7 @@ {(,1)} // Missing expression makes named pair incomplete, making this an empty array. -// Error: 5 expected expression +// Error: 3-5 expected expression, found named pair {(a:)} // Named pair after this is already identified as an array. diff --git a/tests/typ/code/call.typ b/tests/typ/code/call.typ index 2c16af1cf..95d75595d 100644 --- a/tests/typ/code/call.typ +++ b/tests/typ/code/call.typ @@ -72,7 +72,7 @@ // Error: 10-12 expected expression, found end of block comment #func(a:1*/) -// Error: 8 expected comma +// Error: 9 expected comma #func(1 2) // Error: 7-8 expected identifier diff --git a/tests/typ/code/dict.typ b/tests/typ/code/dict.typ index b369b8b65..757759aca 100644 --- a/tests/typ/code/dict.typ +++ b/tests/typ/code/dict.typ @@ -42,7 +42,7 @@ // Identified as dictionary due to initial colon. // Error: 4-5 expected named pair, found expression -// Error: 5 expected comma +// Error: 6 expected comma // Error: 12-16 expected identifier // Error: 17-18 expected expression, found colon {(:1 b:"", true::)} diff --git a/tests/typ/code/import.typ b/tests/typ/code/import.typ index bc96e80c8..1fa8f2057 100644 --- a/tests/typ/code/import.typ +++ b/tests/typ/code/import.typ @@ -79,7 +79,7 @@ This is never reached. // Error: 22 expected keyword `from` #import afrom, "b", c -// Error: 8 expected import items +// Error: 9 expected import items #import from "target.typ" // Error: 9-10 expected expression, found assignment operator @@ -114,4 +114,5 @@ This is never reached. // An item after a star. // Should output `, a from "target.typ"`. // Error: 10 expected keyword `from` +// Error: 10 expected semicolon or line break #import *, a from "target.typ" diff --git a/tests/typ/code/spread.typ b/tests/typ/code/spread.typ index 8a9491d06..41e790a41 100644 --- a/tests/typ/code/spread.typ +++ b/tests/typ/code/spread.typ @@ -62,7 +62,7 @@ #min(.."nope") --- -// Error: 10-14 expected identifier +// Error: 8-14 expected identifier #let f(..true) = none --- @@ -70,9 +70,9 @@ #let f(..a, ..b) = none --- -// Error: 5-6 spreading is not allowed here +// Error: 3-6 spreading is not allowed here {(..x)} --- -// Error: 11-17 spreading is not allowed here +// Error: 9-17 spreading is not allowed here {(1, 2, ..(1, 2))} From 84d35efee38d137a77e368c50421ac24327371c6 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 31 Oct 2021 11:46:12 +0100 Subject: [PATCH 02/18] Less owning, more iterating --- src/eval/capture.rs | 16 +-- src/eval/mod.rs | 14 +-- src/eval/walk.rs | 4 +- src/parse/mod.rs | 275 ++++++++++++++++++++----------------------- src/parse/parser.rs | 73 ++++++------ src/parse/resolve.rs | 8 +- src/parse/tokens.rs | 16 ++- src/source.rs | 6 +- src/syntax/expr.rs | 173 +++++++++++++-------------- src/syntax/ident.rs | 11 +- src/syntax/markup.rs | 104 +++++++--------- src/syntax/mod.rs | 239 +++++++++++++++++++------------------ src/syntax/pretty.rs | 41 ++++--- 13 files changed, 476 insertions(+), 504 deletions(-) diff --git a/src/eval/capture.rs b/src/eval/capture.rs index baf597472..b71e1ac18 100644 --- a/src/eval/capture.rs +++ b/src/eval/capture.rs @@ -1,7 +1,7 @@ use std::rc::Rc; use super::{Scope, Scopes, Value}; -use crate::syntax::{ClosureParam, Expr, Imports, RedTicket}; +use crate::syntax::{ClosureParam, Expr, Imports, RedRef}; /// A visitor that captures variable slots. pub struct CapturesVisitor<'a> { @@ -20,12 +20,12 @@ impl<'a> CapturesVisitor<'a> { } } - pub fn visit(&mut self, node: RedTicket) { + pub fn visit(&mut self, node: RedRef) { let expr: Option = node.cast(); match expr.as_ref() { Some(Expr::Let(expr)) => { - self.visit(expr.init_ticket()); + self.visit(expr.init_ref()); let ident = expr.binding(); self.internal.def_mut(ident.as_str(), Value::None); } @@ -40,7 +40,7 @@ impl<'a> CapturesVisitor<'a> { } } } - self.visit(closure.body_ticket()); + self.visit(closure.body_ref()); } Some(Expr::For(forloop)) => { let pattern = forloop.pattern(); @@ -49,7 +49,7 @@ impl<'a> CapturesVisitor<'a> { if let Some(key) = pattern.key() { self.internal.def_mut(key.as_str(), Value::None); } - self.visit(forloop.body_ticket()); + self.visit(forloop.body_ref()); } Some(Expr::Import(import)) => { if let Imports::Idents(idents) = import.imports() { @@ -73,7 +73,7 @@ impl<'a> CapturesVisitor<'a> { Some(Expr::Block(_)) => { self.internal.enter(); - for child in node.own().children() { + for child in node.children() { self.visit(child); } self.internal.exit(); @@ -81,14 +81,14 @@ impl<'a> CapturesVisitor<'a> { Some(Expr::Template(_)) => { self.internal.enter(); - for child in node.own().children() { + for child in node.children() { self.visit(child); } self.internal.exit(); } _ => { - for child in node.own().children() { + for child in node.children() { self.visit(child); } } diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 296e33808..8d31c1774 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -230,7 +230,7 @@ impl Eval for ArrayExpr { type Output = Array; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - self.items().iter().map(|expr| expr.eval(ctx)).collect() + self.items().map(|expr| expr.eval(ctx)).collect() } } @@ -239,7 +239,6 @@ impl Eval for DictExpr { fn eval(&self, ctx: &mut EvalContext) -> TypResult { self.items() - .iter() .map(|x| Ok(((&x.name().string).into(), x.expr().eval(ctx)?))) .collect() } @@ -268,7 +267,7 @@ impl Eval for BlockExpr { ctx.scopes.enter(); let mut output = Value::None; - for expr in &self.exprs() { + for expr in self.exprs() { let value = expr.eval(ctx)?; output = ops::join(output, value).at(expr.span())?; } @@ -387,9 +386,9 @@ impl Eval for CallArgs { type Output = Args; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let mut items = Vec::with_capacity(self.items().len()); + let mut items = Vec::new(); - for arg in &self.items() { + for arg in self.items() { let span = arg.span(); match arg { CallArg::Pos(expr) => { @@ -454,11 +453,10 @@ impl Eval for ClosureExpr { }; let mut sink = None; - let params_src = self.params(); - let mut params = Vec::with_capacity(params_src.len()); + let mut params = Vec::new(); // Collect parameters and an optional sink parameter. - for param in ¶ms_src { + for param in self.params() { match param { ClosureParam::Pos(name) => { params.push((name.string.clone(), None)); diff --git a/src/eval/walk.rs b/src/eval/walk.rs index e4d7f61a1..b28f4fde7 100644 --- a/src/eval/walk.rs +++ b/src/eval/walk.rs @@ -69,7 +69,7 @@ impl Walk for RawNode { impl Walk for HeadingNode { fn walk(&self, ctx: &mut EvalContext) -> TypResult<()> { - let level = self.level().0; + let level = self.level(); let body = self.body().eval(ctx)?; ctx.template.parbreak(); @@ -99,7 +99,7 @@ impl Walk for ListNode { impl Walk for EnumNode { fn walk(&self, ctx: &mut EvalContext) -> TypResult<()> { let body = self.body().eval(ctx)?; - let label = format_str!("{}.", self.number().0.unwrap_or(1)); + let label = format_str!("{}.", self.number().unwrap_or(1)); walk_item(ctx, label, body); Ok(()) } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index dc7691833..0425f8248 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -30,15 +30,14 @@ fn markup(p: &mut Parser) { /// Parse markup that stays right of the given column. fn markup_indented(p: &mut Parser, column: usize) { - // TODO this is broken p.eat_while(|t| match t { - NodeKind::Space(n) => n == 0, + NodeKind::Space(n) => *n == 0, NodeKind::LineComment | NodeKind::BlockComment => true, _ => false, }); markup_while(p, false, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if n >= 1 => p.column(p.next_end()) >= column, + Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.next_end()) >= column, _ => true, }) } @@ -64,125 +63,119 @@ where /// Parse a markup node. fn markup_node(p: &mut Parser, at_start: &mut bool) { - if let Some(token) = p.peek() { - match token { - // Whitespace. - NodeKind::Space(newlines) => { - *at_start |= newlines > 0; + let token = match p.peek() { + Some(t) => t, + None => return, + }; - if newlines < 2 { - p.eat(); - } else { - p.convert(NodeKind::Parbreak); - } - } - - // Text. - NodeKind::UnicodeEscape(u) => { - if !u.terminated { - p.convert(NodeKind::Error( - ErrorPosition::End, - "expected closing brace".into(), - )); - p.unsuccessful(); - return; - } - - if u.character.is_none() { - let src = p.peek_src(); - p.convert(NodeKind::Error( - ErrorPosition::Full, - "invalid unicode escape sequence".into(), - )); - p.start(); - p.end(NodeKind::Text(src.into())); - return; - } + match token { + // Whitespace. + NodeKind::Space(newlines) => { + *at_start |= *newlines > 0; + if *newlines < 2 { p.eat(); + } else { + p.convert(NodeKind::Parbreak); } - NodeKind::Raw(r) => { - if !r.terminated { - p.convert(NodeKind::Error( - ErrorPosition::End, - "expected backtick(s)".into(), - )); - p.unsuccessful(); - return; - } + } - p.eat(); - } - NodeKind::Text(_) - | NodeKind::EnDash - | NodeKind::EmDash - | NodeKind::NonBreakingSpace => { - p.eat(); + // Text and markup. + NodeKind::Text(_) + | NodeKind::EnDash + | NodeKind::EmDash + | NodeKind::NonBreakingSpace + | NodeKind::Emph + | NodeKind::Strong + | NodeKind::Linebreak => p.eat(), + + NodeKind::UnicodeEscape(u) => { + if !u.terminated { + p.convert(NodeKind::Error( + ErrorPosition::End, + "expected closing brace".into(), + )); + p.unsuccessful(); + return; } - // Markup. - NodeKind::Emph | NodeKind::Strong | NodeKind::Linebreak => { - p.eat(); + if u.character.is_none() { + let src = p.peek_src(); + p.convert(NodeKind::Error( + ErrorPosition::Full, + "invalid unicode escape sequence".into(), + )); + p.start(); + p.end(NodeKind::Text(src.into())); + return; } - NodeKind::Eq if *at_start => heading(p), - NodeKind::ListBullet if *at_start => list_node(p), - NodeKind::EnumNumbering(_) if *at_start => enum_node(p), - - // Line-based markup that is not currently at the start of the line. - NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { - p.convert(NodeKind::Text(p.peek_src().into())) + p.eat(); + } + NodeKind::Raw(r) => { + if !r.terminated { + p.convert(NodeKind::Error( + ErrorPosition::End, + "expected backtick(s)".into(), + )); + p.unsuccessful(); + return; } - // Hashtag + keyword / identifier. - NodeKind::Ident(_) - | NodeKind::Let - | NodeKind::If - | NodeKind::While - | NodeKind::For - | NodeKind::Import - | NodeKind::Include => { - let stmt = matches!(token, NodeKind::Let | NodeKind::Import); - let group = if stmt { Group::Stmt } else { Group::Expr }; + p.eat(); + } - p.start_group(group, TokenMode::Code); - expr_with(p, true, 0); - if stmt && p.success() && !p.eof() { - p.expected_at("semicolon or line break"); - } - p.end_group(); - } + NodeKind::Eq if *at_start => heading(p), + NodeKind::ListBullet if *at_start => list_node(p), + NodeKind::EnumNumbering(_) if *at_start => enum_node(p), - // Block and template. - NodeKind::LeftBrace => { - block(p); - } - NodeKind::LeftBracket => { - template(p); - } + // Line-based markup that is not currently at the start of the line. + NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { + p.convert(NodeKind::Text(p.peek_src().into())) + } - // Comments. - NodeKind::LineComment | NodeKind::BlockComment => { - p.eat(); - } + // Hashtag + keyword / identifier. + NodeKind::Ident(_) + | NodeKind::Let + | NodeKind::If + | NodeKind::While + | NodeKind::For + | NodeKind::Import + | NodeKind::Include => { + let stmt = matches!(token, NodeKind::Let | NodeKind::Import); + let group = if stmt { Group::Stmt } else { Group::Expr }; - _ => { - *at_start = false; - p.unexpected(); + p.start_group(group, TokenMode::Code); + expr_with(p, true, 0); + if stmt && p.success() && !p.eof() { + p.expected_at("semicolon or line break"); } - }; - } + p.end_group(); + } + + // Block and template. + NodeKind::LeftBrace => block(p), + NodeKind::LeftBracket => template(p), + + // Comments. + NodeKind::LineComment | NodeKind::BlockComment => p.eat(), + + _ => { + *at_start = false; + p.unexpected(); + } + }; } /// Parse a heading. fn heading(p: &mut Parser) { p.start(); p.start(); - p.eat_assert(NodeKind::Eq); + p.eat_assert(&NodeKind::Eq); // Count depth. let mut level: usize = 1; - while p.eat_if(NodeKind::Eq) { + while p.eat_if(&NodeKind::Eq) { level += 1; } @@ -200,7 +193,7 @@ fn heading(p: &mut Parser) { /// Parse a single list item. fn list_node(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::ListBullet); + p.eat_assert(&NodeKind::ListBullet); let column = p.column(p.prev_end()); markup_indented(p, column); p.end(NodeKind::List); @@ -209,9 +202,7 @@ fn list_node(p: &mut Parser) { /// Parse a single enum item. fn enum_node(p: &mut Parser) { p.start(); - if !matches!(p.eat(), Some(NodeKind::EnumNumbering(_))) { - panic!("enum item does not start with numbering") - }; + p.eat(); let column = p.column(p.prev_end()); markup_indented(p, column); p.end(NodeKind::Enum); @@ -263,7 +254,7 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { continue; } - if p.peek() == Some(NodeKind::With) { + if p.peek() == Some(&NodeKind::With) { with_expr(p, p.child_count() - offset); if p.may_lift_abort() { @@ -276,7 +267,7 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { break; } - let op = match p.peek().as_ref().and_then(BinOp::from_token) { + let op = match p.peek().and_then(BinOp::from_token) { Some(binop) => binop, None => { p.lift(); @@ -286,10 +277,8 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { let mut prec = op.precedence(); if prec < min_prec { - { - p.lift(); - break; - }; + p.lift(); + break; } p.eat(); @@ -324,7 +313,7 @@ fn primary(p: &mut Parser, atomic: bool) { p.eat(); // Arrow means this is a closure's lone parameter. - if !atomic && p.peek() == Some(NodeKind::Arrow) { + if !atomic && p.peek() == Some(&NodeKind::Arrow) { p.end_and_start_with(NodeKind::ClosureParams); p.eat(); @@ -359,10 +348,9 @@ fn primary(p: &mut Parser, atomic: bool) { /// Parse a literal. fn literal(p: &mut Parser) -> bool { - let peeked = if let Some(p) = p.peek() { - p - } else { - return false; + let peeked = match p.peek() { + Some(x) => x.clone(), + None => return false, }; match peeked { @@ -375,18 +363,14 @@ fn literal(p: &mut Parser) -> bool { | NodeKind::Fraction(_) | NodeKind::Length(_, _) | NodeKind::Angle(_, _) - | NodeKind::Percentage(_) => { - p.eat(); - } + | NodeKind::Percentage(_) => p.eat(), NodeKind::Str(s) => { p.eat(); if !s.terminated { p.expected_at("quote"); } } - _ => { - return false; - } + _ => return false, } true @@ -401,7 +385,7 @@ fn parenthesized(p: &mut Parser) { let offset = p.child_count(); p.start(); p.start_group(Group::Paren, TokenMode::Code); - let colon = p.eat_if(NodeKind::Colon); + let colon = p.eat_if(&NodeKind::Colon); let kind = collection(p).0; p.end_group(); let token_count = p.child_count() - offset; @@ -414,12 +398,12 @@ fn parenthesized(p: &mut Parser) { } // Arrow means this is a closure's parameter list. - if p.peek() == Some(NodeKind::Arrow) { + if p.peek() == Some(&NodeKind::Arrow) { p.start_with(token_count); params(p, 0, true); p.end(NodeKind::ClosureParams); - p.eat_assert(NodeKind::Arrow); + p.eat_assert(&NodeKind::Arrow); expr(p); @@ -485,7 +469,7 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { break; } - if p.eat_if(NodeKind::Comma) { + if p.eat_if(&NodeKind::Comma) { has_comma = true; } else { missing_coma = Some(p.child_count()); @@ -518,7 +502,7 @@ enum CollectionItemKind { /// Parse an expression or a named pair. Returns if this is a named pair. fn item(p: &mut Parser) -> CollectionItemKind { p.start(); - if p.eat_if(NodeKind::Dots) { + if p.eat_if(&NodeKind::Dots) { expr(p); p.end_or_abort(NodeKind::ParameterSink); @@ -531,7 +515,7 @@ fn item(p: &mut Parser) -> CollectionItemKind { return CollectionItemKind::Unnamed; } - if p.eat_if(NodeKind::Colon) { + if p.eat_if(&NodeKind::Colon) { let child = p.child(1).unwrap(); if matches!(child.kind(), &NodeKind::Ident(_)) { expr(p); @@ -686,9 +670,9 @@ fn args(p: &mut Parser, allow_template: bool) { /// Parse a with expression. fn with_expr(p: &mut Parser, preserve: usize) { p.start_with(preserve); - p.eat_assert(NodeKind::With); + p.eat_assert(&NodeKind::With); - if p.peek() == Some(NodeKind::LeftParen) { + if p.peek() == Some(&NodeKind::LeftParen) { args(p, false); p.end(NodeKind::WithExpr); } else { @@ -700,7 +684,7 @@ fn with_expr(p: &mut Parser, preserve: usize) { /// Parse a let expression. fn let_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::Let); + p.eat_assert(&NodeKind::Let); let offset = p.child_count(); ident(p); @@ -708,7 +692,7 @@ fn let_expr(p: &mut Parser) { return; } - if p.peek() == Some(NodeKind::With) { + if p.peek() == Some(&NodeKind::With) { with_expr(p, p.child_count() - offset); } else { // If a parenthesis follows, this is a function definition. @@ -725,7 +709,7 @@ fn let_expr(p: &mut Parser) { false }; - if p.eat_if(NodeKind::Eq) { + if p.eat_if(&NodeKind::Eq) { expr(p); } else if has_params { // Function definitions must have a body. @@ -749,7 +733,7 @@ fn let_expr(p: &mut Parser) { /// Parse an if expresion. fn if_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::If); + p.eat_assert(&NodeKind::If); expr(p); if p.may_end_abort(NodeKind::IfExpr) { @@ -762,8 +746,8 @@ fn if_expr(p: &mut Parser) { return; } - if p.eat_if(NodeKind::Else) { - if p.peek() == Some(NodeKind::If) { + if p.eat_if(&NodeKind::Else) { + if p.peek() == Some(&NodeKind::If) { if_expr(p); } else { body(p); @@ -776,7 +760,7 @@ fn if_expr(p: &mut Parser) { /// Parse a while expresion. fn while_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::While); + p.eat_assert(&NodeKind::While); expr(p); @@ -793,7 +777,7 @@ fn while_expr(p: &mut Parser) { /// Parse a for expression. fn for_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::For); + p.eat_assert(&NodeKind::For); for_pattern(p); @@ -801,7 +785,7 @@ fn for_expr(p: &mut Parser) { return; } - if p.eat_expect(NodeKind::In) { + if p.eat_expect(&NodeKind::In) { expr(p); if p.may_end_abort(NodeKind::ForExpr) { @@ -828,7 +812,7 @@ fn for_pattern(p: &mut Parser) { return; } - if p.peek() == Some(NodeKind::Comma) { + if p.peek() == Some(&NodeKind::Comma) { p.eat(); ident(p); @@ -844,9 +828,9 @@ fn for_pattern(p: &mut Parser) { /// Parse an import expression. fn import_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::Import); + p.eat_assert(&NodeKind::Import); - if !p.eat_if(NodeKind::Star) { + if !p.eat_if(&NodeKind::Star) { // This is the list of identifiers scenario. p.start(); p.start_group(Group::Imports, TokenMode::Code); @@ -865,7 +849,7 @@ fn import_expr(p: &mut Parser) { p.end(NodeKind::ImportItems); }; - if p.eat_expect(NodeKind::From) { + if p.eat_expect(&NodeKind::From) { expr(p); } @@ -875,7 +859,7 @@ fn import_expr(p: &mut Parser) { /// Parse an include expression. fn include_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::Include); + p.eat_assert(&NodeKind::Include); expr(p); p.end(NodeKind::IncludeExpr); @@ -883,11 +867,12 @@ fn include_expr(p: &mut Parser) { /// Parse an identifier. fn ident(p: &mut Parser) { - if let Some(NodeKind::Ident(_)) = p.peek() { - p.eat(); - } else { - p.expected("identifier"); - p.unsuccessful(); + match p.peek() { + Some(NodeKind::Ident(_)) => p.eat(), + _ => { + p.expected("identifier"); + p.unsuccessful(); + } } } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index f62e882af..e6fcc1aed 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -161,7 +161,7 @@ impl<'s> Parser<'s> { let len = children.iter().map(|c| c.len()).sum(); self.children - .push(GreenNode::with_children(kind, len, children.into_iter()).into()); + .push(GreenNode::with_children(kind, len, children).into()); self.children.extend(remains); self.success = true; } @@ -240,10 +240,9 @@ impl<'s> Parser<'s> { } pub fn finish(&mut self) -> Rc { - if let Green::Node(n) = self.children.pop().unwrap() { - n - } else { - panic!() + match self.children.pop().unwrap() { + Green::Node(n) => n, + _ => panic!(), } } @@ -252,16 +251,16 @@ impl<'s> Parser<'s> { self.peek().is_none() } - pub fn eat(&mut self) -> Option { - let token = self.peek()?; - self.bump(); + fn eat_peeked(&mut self) -> Option { + let token = self.peek()?.clone(); + self.eat(); Some(token) } /// Consume the next token if it is the given one. - pub fn eat_if(&mut self, t: NodeKind) -> bool { + pub fn eat_if(&mut self, t: &NodeKind) -> bool { if self.peek() == Some(t) { - self.bump(); + self.eat(); true } else { false @@ -271,36 +270,36 @@ impl<'s> Parser<'s> { /// Consume the next token if the closure maps it a to `Some`-variant. pub fn eat_map(&mut self, f: F) -> Option where - F: FnOnce(NodeKind) -> Option, + F: FnOnce(&NodeKind) -> Option, { let token = self.peek()?; let mapped = f(token); if mapped.is_some() { - self.bump(); + self.eat(); } mapped } /// Consume the next token if it is the given one and produce an error if /// not. - pub fn eat_expect(&mut self, t: NodeKind) -> bool { - let eaten = self.eat_if(t.clone()); + pub fn eat_expect(&mut self, t: &NodeKind) -> bool { + let eaten = self.eat_if(t); if !eaten { - self.expected_at(&t.to_string()); + self.expected_at(t.as_str()); } eaten } /// Consume the next token, debug-asserting that it is one of the given ones. - pub fn eat_assert(&mut self, t: NodeKind) { - let next = self.eat(); - debug_assert_eq!(next, Some(t)); + pub fn eat_assert(&mut self, t: &NodeKind) { + let next = self.eat_peeked(); + debug_assert_eq!(next.as_ref(), Some(t)); } /// Consume tokens while the condition is true. pub fn eat_while(&mut self, mut f: F) where - F: FnMut(NodeKind) -> bool, + F: FnMut(&NodeKind) -> bool, { while self.peek().map_or(false, |t| f(t)) { self.eat(); @@ -308,8 +307,8 @@ impl<'s> Parser<'s> { } /// Peek at the next token without consuming it. - pub fn peek(&self) -> Option { - self.peeked.clone() + pub fn peek(&self) -> Option<&NodeKind> { + self.peeked.as_ref() } /// Peek at the next token if it follows immediately after the last one @@ -371,9 +370,9 @@ impl<'s> Parser<'s> { self.repeek(); match kind { - Group::Paren => self.eat_assert(NodeKind::LeftParen), - Group::Bracket => self.eat_assert(NodeKind::LeftBracket), - Group::Brace => self.eat_assert(NodeKind::LeftBrace), + Group::Paren => self.eat_assert(&NodeKind::LeftParen), + Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), + Group::Brace => self.eat_assert(&NodeKind::LeftBrace), Group::Stmt => {} Group::Expr => {} Group::Imports => {} @@ -402,11 +401,11 @@ impl<'s> Parser<'s> { } { if self.next == Some(end.clone()) { // Bump the delimeter and return. No need to rescan in this case. - self.bump(); + self.eat(); rescan = false; } else if required { self.start(); - self.abort(format!("expected {}", end.to_string())); + self.abort(format!("expected {}", end)); } } @@ -457,21 +456,21 @@ impl<'s> Parser<'s> { /// Eat the next token and add an error that it is not the expected `thing`. pub fn expected(&mut self, what: &str) { self.start(); - if let Some(found) = self.eat() { - self.abort(format!("expected {}, found {}", what, found.to_string())) - } else { - self.lift(); - self.expected_at(what); + match self.eat_peeked() { + Some(found) => self.abort(format!("expected {}, found {}", what, found)), + None => { + self.lift(); + self.expected_at(what); + } } } /// Eat the next token and add an error that it is unexpected. pub fn unexpected(&mut self) { self.start(); - if let Some(found) = self.eat() { - self.abort(format!("unexpected {}", found.to_string())) - } else { - self.abort("unexpected end of file") + match self.eat_peeked() { + Some(found) => self.abort(format!("unexpected {}", found)), + None => self.abort("unexpected end of file"), } } @@ -489,7 +488,7 @@ impl<'s> Parser<'s> { } /// Move to the next token. - fn bump(&mut self) { + pub fn eat(&mut self) { self.children.push( GreenData::new( self.next.clone().unwrap(), @@ -511,7 +510,7 @@ impl<'s> Parser<'s> { if self.tokens.mode() == TokenMode::Code { // Skip whitespace and comments. while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.bump(); + self.eat(); } } diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index c59c3bb17..1b3089a63 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -25,11 +25,9 @@ pub fn resolve_string(string: &str) -> EcoString { let sequence = s.eat_while(|c| c.is_ascii_hexdigit()); let _terminated = s.eat_if('}'); - if let Some(c) = resolve_hex(sequence) { - out.push(c); - } else { - // TODO: Feedback that unicode escape sequence is wrong. - out.push_str(s.eaten_from(start)); + match resolve_hex(sequence) { + Some(c) => out.push(c), + None => out.push_str(s.eaten_from(start)), } } diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 19d0d77bb..bfd9f3ed1 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -224,8 +224,8 @@ impl<'s> Tokens<'s> { } fn backslash(&mut self) -> NodeKind { - if let Some(c) = self.s.peek() { - match c { + match self.s.peek() { + Some(c) => match c { // Backslash and comments. '\\' | '/' | // Parenthesis and hashtag. @@ -247,9 +247,8 @@ impl<'s> Tokens<'s> { } c if c.is_whitespace() => NodeKind::Linebreak, _ => NodeKind::Text("\\".into()), - } - } else { - NodeKind::Linebreak + }, + None => NodeKind::Linebreak, } } @@ -257,10 +256,9 @@ impl<'s> Tokens<'s> { fn hash(&mut self) -> NodeKind { if self.s.check_or(false, is_id_start) { let read = self.s.eat_while(is_id_continue); - if let Some(keyword) = keyword(read) { - keyword - } else { - NodeKind::Ident(read.into()) + match keyword(read) { + Some(keyword) => keyword, + None => NodeKind::Ident(read.into()), } } else { NodeKind::Text("#".into()) diff --git a/src/source.rs b/src/source.rs index e33e146c0..e3803f575 100644 --- a/src/source.rs +++ b/src/source.rs @@ -148,10 +148,10 @@ impl SourceFile { } pub fn ast(&self) -> TypResult { - let res = RedNode::new_root(self.root.clone(), self.id); - let errors = res.errors(); + let red = RedNode::new_root(self.root.clone(), self.id); + let errors = red.errors(); if errors.is_empty() { - Ok(res.ticket().cast().unwrap()) + Ok(red.as_ref().cast().unwrap()) } else { Err(Box::new( errors.into_iter().map(|(span, msg)| Error::new(span, msg)).collect(), diff --git a/src/syntax/expr.rs b/src/syntax/expr.rs index d0d0c62fe..8562a3a49 100644 --- a/src/syntax/expr.rs +++ b/src/syntax/expr.rs @@ -1,4 +1,4 @@ -use super::{Ident, Markup, NodeKind, RedNode, RedTicket, Span, TypedNode}; +use super::{Ident, Markup, NodeKind, RedNode, RedRef, Span, TypedNode}; use crate::geom::{AngularUnit, LengthUnit}; use crate::node; use crate::util::EcoString; @@ -85,7 +85,7 @@ impl Expr { } impl TypedNode for Expr { - fn cast_from(node: RedTicket) -> Option { + fn cast_from(node: RedRef) -> Option { match node.kind() { NodeKind::Ident(_) => Some(Self::Ident(Ident::cast_from(node).unwrap())), NodeKind::Array => Some(Self::Array(ArrayExpr::cast_from(node).unwrap())), @@ -146,18 +146,18 @@ pub enum Lit { } impl TypedNode for Lit { - fn cast_from(node: RedTicket) -> Option { + fn cast_from(node: RedRef) -> Option { match node.kind() { - NodeKind::None => Some(Self::None(node.own().span())), - NodeKind::Auto => Some(Self::Auto(node.own().span())), - NodeKind::Bool(b) => Some(Self::Bool(node.own().span(), *b)), - NodeKind::Int(i) => Some(Self::Int(node.own().span(), *i)), - NodeKind::Float(f) => Some(Self::Float(node.own().span(), *f)), - NodeKind::Length(f, unit) => Some(Self::Length(node.own().span(), *f, *unit)), - NodeKind::Angle(f, unit) => Some(Self::Angle(node.own().span(), *f, *unit)), - NodeKind::Percentage(f) => Some(Self::Percent(node.own().span(), *f)), - NodeKind::Fraction(f) => Some(Self::Fractional(node.own().span(), *f)), - NodeKind::Str(s) => Some(Self::Str(node.own().span(), s.string.clone())), + NodeKind::None => Some(Self::None(node.span())), + NodeKind::Auto => Some(Self::Auto(node.span())), + NodeKind::Bool(b) => Some(Self::Bool(node.span(), *b)), + NodeKind::Int(i) => Some(Self::Int(node.span(), *i)), + NodeKind::Float(f) => Some(Self::Float(node.span(), *f)), + NodeKind::Length(f, unit) => Some(Self::Length(node.span(), *f, *unit)), + NodeKind::Angle(f, unit) => Some(Self::Angle(node.span(), *f, *unit)), + NodeKind::Percentage(f) => Some(Self::Percent(node.span(), *f)), + NodeKind::Fraction(f) => Some(Self::Fractional(node.span(), *f)), + NodeKind::Str(s) => Some(Self::Str(node.span(), s.string.clone())), _ => None, } } @@ -180,34 +180,34 @@ impl Lit { } } -node!( +node! { /// An array expression: `(1, "hi", 12cm)`. Array => ArrayExpr -); +} impl ArrayExpr { /// The array items. - pub fn items(&self) -> Vec { - self.0.children().filter_map(RedTicket::cast).collect() + pub fn items<'a>(&'a self) -> impl Iterator + 'a { + self.0.children().filter_map(RedRef::cast) } } -node!( +node! { /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. Dict => DictExpr -); +} impl DictExpr { /// The named dictionary items. - pub fn items(&self) -> Vec { - self.0.children().filter_map(RedTicket::cast).collect() + pub fn items<'a>(&'a self) -> impl Iterator + 'a { + self.0.children().filter_map(RedRef::cast) } } -node!( +node! { /// A pair of a name and an expression: `pattern: dashed`. Named -); +} impl Named { /// The name: `pattern`. @@ -219,16 +219,16 @@ impl Named { pub fn expr(&self) -> Expr { self.0 .children() - .filter_map(RedTicket::cast) + .filter_map(RedRef::cast) .nth(1) .expect("named pair is missing expression") } } -node!( +node! { /// A template expression: `[*Hi* there!]`. Template => TemplateExpr -); +} impl TemplateExpr { /// The contents of the template. @@ -239,10 +239,10 @@ impl TemplateExpr { } } -node!( +node! { /// A grouped expression: `(1 + 2)`. Group => GroupExpr -); +} impl GroupExpr { /// The wrapped expression. @@ -253,22 +253,22 @@ impl GroupExpr { } } -node!( +node! { /// A block expression: `{ let x = 1; x + 2 }`. Block => BlockExpr -); +} impl BlockExpr { /// The list of expressions contained in the block. - pub fn exprs(&self) -> Vec { - self.0.children().filter_map(RedTicket::cast).collect() + pub fn exprs<'a>(&'a self) -> impl Iterator + 'a { + self.0.children().filter_map(RedRef::cast) } } -node!( +node! { /// A unary operation: `-x`. Unary => UnaryExpr -); +} impl UnaryExpr { /// The operator: `-`. @@ -298,7 +298,7 @@ pub enum UnOp { } impl TypedNode for UnOp { - fn cast_from(node: RedTicket) -> Option { + fn cast_from(node: RedRef) -> Option { Self::from_token(node.kind()) } } @@ -332,10 +332,10 @@ impl UnOp { } } -node!( +node! { /// A binary operation: `a + b`. Binary => BinaryExpr -); +} impl BinaryExpr { /// The binary operator: `+`. @@ -356,7 +356,7 @@ impl BinaryExpr { pub fn rhs(&self) -> Expr { self.0 .children() - .filter_map(RedTicket::cast) + .filter_map(RedRef::cast) .nth(1) .expect("binary expression is missing right-hand side") } @@ -402,7 +402,7 @@ pub enum BinOp { } impl TypedNode for BinOp { - fn cast_from(node: RedTicket) -> Option { + fn cast_from(node: RedRef) -> Option { Self::from_token(node.kind()) } } @@ -504,10 +504,10 @@ pub enum Associativity { Right, } -node!( +node! { /// An invocation of a function: `foo(...)`. Call => CallExpr -); +} impl CallExpr { /// The function to call. @@ -523,15 +523,15 @@ impl CallExpr { } } -node!( +node! { /// The arguments to a function: `12, draw: false`. CallArgs -); +} impl CallArgs { /// The positional and named arguments. - pub fn items(&self) -> Vec { - self.0.children().filter_map(RedTicket::cast).collect() + pub fn items<'a>(&'a self) -> impl Iterator + 'a { + self.0.children().filter_map(RedRef::cast) } } @@ -547,14 +547,13 @@ pub enum CallArg { } impl TypedNode for CallArg { - fn cast_from(node: RedTicket) -> Option { + fn cast_from(node: RedRef) -> Option { match node.kind() { NodeKind::Named => Some(CallArg::Named( node.cast().expect("named call argument is missing name"), )), NodeKind::ParameterSink => Some(CallArg::Spread( - node.own() - .cast_first_child() + node.cast_first_child() .expect("call argument sink is missing expression"), )), _ => Some(CallArg::Pos(node.cast()?)), @@ -573,10 +572,10 @@ impl CallArg { } } -node!( +node! { /// A closure expression: `(x, y) => z`. Closure => ClosureExpr -); +} impl ClosureExpr { /// The name of the closure. @@ -589,15 +588,13 @@ impl ClosureExpr { } /// The parameter bindings. - pub fn params(&self) -> Vec { + pub fn params<'a>(&'a self) -> impl Iterator + 'a { self.0 .children() .find(|x| x.kind() == &NodeKind::ClosureParams) .expect("closure is missing parameter list") - .own() .children() - .filter_map(RedTicket::cast) - .collect() + .filter_map(RedRef::cast) } /// The body of the closure. @@ -607,8 +604,8 @@ impl ClosureExpr { self.0.cast_last_child().expect("closure is missing body") } - /// The ticket of the body of the closure. - pub fn body_ticket(&self) -> RedTicket { + /// The red node reference of the body of the closure. + pub fn body_ref(&self) -> RedRef { self.0 .children() .filter(|x| x.cast::().is_some()) @@ -629,17 +626,16 @@ pub enum ClosureParam { } impl TypedNode for ClosureParam { - fn cast_from(node: RedTicket) -> Option { + fn cast_from(node: RedRef) -> Option { match node.kind() { NodeKind::Ident(i) => { - Some(ClosureParam::Pos(Ident::new(i, node.own().span()).unwrap())) + Some(ClosureParam::Pos(Ident::new(i, node.span()).unwrap())) } NodeKind::Named => Some(ClosureParam::Named( node.cast().expect("named closure parameter is missing name"), )), NodeKind::ParameterSink => Some(ClosureParam::Sink( - node.own() - .cast_first_child() + node.cast_first_child() .expect("closure parameter sink is missing identifier"), )), _ => Some(ClosureParam::Pos(node.cast()?)), @@ -647,10 +643,10 @@ impl TypedNode for ClosureParam { } } -node!( +node! { /// A with expression: `f with (x, y: 1)`. WithExpr -); +} impl WithExpr { /// The function to apply the arguments to. @@ -668,10 +664,10 @@ impl WithExpr { } } -node!( +node! { /// A let expression: `let x = 1`. LetExpr -); +} impl LetExpr { /// The binding to assign to. @@ -693,7 +689,7 @@ impl LetExpr { /// The expression the binding is initialized with. pub fn init(&self) -> Option { if self.0.cast_first_child::().is_some() { - self.0.children().filter_map(RedTicket::cast).nth(1) + self.0.children().filter_map(RedRef::cast).nth(1) } else { Some( self.0 @@ -703,8 +699,9 @@ impl LetExpr { } } - /// The ticket for the expression the binding is initialized with. - pub fn init_ticket(&self) -> RedTicket { + /// The red node reference for the expression the binding is initialized + /// with. + pub fn init_ref(&self) -> RedRef { if self.0.cast_first_child::().is_some() { self.0.children().filter(|x| x.cast::().is_some()).nth(1) } else { @@ -714,10 +711,10 @@ impl LetExpr { } } -node!( +node! { /// An import expression: `import a, b, c from "utils.typ"`. ImportExpr -); +} impl ImportExpr { /// The items to be imported. @@ -745,11 +742,11 @@ pub enum Imports { } impl TypedNode for Imports { - fn cast_from(node: RedTicket) -> Option { + fn cast_from(node: RedRef) -> Option { match node.kind() { NodeKind::Star => Some(Imports::Wildcard), NodeKind::ImportItems => { - let idents = node.own().children().filter_map(RedTicket::cast).collect(); + let idents = node.children().filter_map(RedRef::cast).collect(); Some(Imports::Idents(idents)) } _ => None, @@ -757,10 +754,10 @@ impl TypedNode for Imports { } } -node!( +node! { /// An include expression: `include "chapter1.typ"`. IncludeExpr -); +} impl IncludeExpr { /// The location of the file to be included. @@ -771,10 +768,10 @@ impl IncludeExpr { } } -node!( +node! { /// An if-else expression: `if x { y } else { z }`. IfExpr -); +} impl IfExpr { /// The condition which selects the body to evaluate. @@ -788,21 +785,21 @@ impl IfExpr { pub fn if_body(&self) -> Expr { self.0 .children() - .filter_map(RedTicket::cast) + .filter_map(RedRef::cast) .nth(1) .expect("if expression is missing if body") } /// The expression to evaluate if the condition is false. pub fn else_body(&self) -> Option { - self.0.children().filter_map(RedTicket::cast).nth(2) + self.0.children().filter_map(RedRef::cast).nth(2) } } -node!( +node! { /// A while loop expression: `while x { y }`. WhileExpr -); +} impl WhileExpr { /// The condition which selects whether to evaluate the body. @@ -816,16 +813,16 @@ impl WhileExpr { pub fn body(&self) -> Expr { self.0 .children() - .filter_map(RedTicket::cast) + .filter_map(RedRef::cast) .nth(1) .expect("while loop expression is missing body") } } -node!( +node! { /// A for loop expression: `for x in y { z }`. ForExpr -); +} impl ForExpr { /// The pattern to assign to. @@ -846,13 +843,13 @@ impl ForExpr { pub fn body(&self) -> Expr { self.0 .children() - .filter_map(RedTicket::cast) + .filter_map(RedRef::cast) .last() .expect("for loop expression is missing body") } - /// The ticket for the expression to evaluate for each iteration. - pub fn body_ticket(&self) -> RedTicket { + /// The red node reference for the expression to evaluate for each iteration. + pub fn body_ref(&self) -> RedRef { self.0 .children() .filter(|x| x.cast::().is_some()) @@ -861,14 +858,14 @@ impl ForExpr { } } -node!( +node! { /// A for-in loop expression: `for x in y { z }`. ForPattern -); +} impl ForPattern { pub fn key(&self) -> Option { - let mut items: Vec<_> = self.0.children().filter_map(RedTicket::cast).collect(); + let mut items: Vec<_> = self.0.children().filter_map(RedRef::cast).collect(); if items.len() > 1 { Some(items.remove(0)) } else { None } } diff --git a/src/syntax/ident.rs b/src/syntax/ident.rs index 2c61329d1..f5cc63300 100644 --- a/src/syntax/ident.rs +++ b/src/syntax/ident.rs @@ -3,7 +3,7 @@ use std::ops::Deref; use unicode_xid::UnicodeXID; -use super::{NodeKind, RedTicket, Span, TypedNode}; +use super::{NodeKind, RedRef, Span, TypedNode}; use crate::util::EcoString; /// An unicode identifier with a few extra permissible characters. @@ -67,11 +67,10 @@ impl From<&Ident> for EcoString { } impl TypedNode for Ident { - fn cast_from(node: RedTicket) -> Option { - if let NodeKind::Ident(i) = node.kind() { - Some(Ident::new(i, node.own().span()).unwrap()) - } else { - None + fn cast_from(node: RedRef) -> Option { + match node.kind() { + NodeKind::Ident(i) => Some(Ident::new(i, node.span()).unwrap()), + _ => None, } } } diff --git a/src/syntax/markup.rs b/src/syntax/markup.rs index c12c0e819..de547f769 100644 --- a/src/syntax/markup.rs +++ b/src/syntax/markup.rs @@ -1,4 +1,4 @@ -use super::{Expr, Ident, NodeKind, RedNode, RedTicket, Span, TypedNode}; +use super::{Expr, Ident, NodeKind, RedNode, RedRef, Span, TypedNode}; use crate::node; use crate::util::EcoString; use std::fmt::Write; @@ -7,12 +7,12 @@ use std::fmt::Write; pub type Markup = Vec; impl TypedNode for Markup { - fn cast_from(node: RedTicket) -> Option { + fn cast_from(node: RedRef) -> Option { if node.kind() != &NodeKind::Markup { return None; } - let children = node.own().children().filter_map(TypedNode::cast_from).collect(); + let children = node.children().filter_map(TypedNode::cast_from).collect(); Some(children) } } @@ -45,7 +45,7 @@ pub enum MarkupNode { } impl TypedNode for MarkupNode { - fn cast_from(node: RedTicket) -> Option { + fn cast_from(node: RedRef) -> Option { match node.kind() { NodeKind::Space(_) => Some(MarkupNode::Space), NodeKind::Linebreak => Some(MarkupNode::Linebreak), @@ -53,15 +53,14 @@ impl TypedNode for MarkupNode { NodeKind::Strong => Some(MarkupNode::Strong), NodeKind::Emph => Some(MarkupNode::Emph), NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), - NodeKind::UnicodeEscape(u) => { - Some(MarkupNode::Text(if let Some(s) = u.character { - s.into() - } else { + NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(match u.character { + Some(c) => c.into(), + None => { let mut eco = EcoString::with_capacity(u.sequence.len() + 4); write!(&mut eco, "\\u{{{}}}", u.sequence).unwrap(); eco - })) - } + } + })), NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))), NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))), NodeKind::NonBreakingSpace => { @@ -93,28 +92,29 @@ pub struct RawNode { } impl TypedNode for RawNode { - fn cast_from(node: RedTicket) -> Option { - if let NodeKind::Raw(raw) = node.kind() { - let span = node.own().span(); - let start = span.start + raw.backticks as usize; - Some(Self { - block: raw.block, - lang: raw.lang.as_ref().and_then(|x| { - let span = Span::new(span.source, start, start + x.len()); - Ident::new(x, span) - }), - text: raw.text.clone(), - }) - } else { - None + fn cast_from(node: RedRef) -> Option { + match node.kind() { + NodeKind::Raw(raw) => { + let span = node.span(); + let start = span.start + raw.backticks as usize; + Some(Self { + block: raw.block, + lang: raw.lang.as_ref().and_then(|x| { + let span = Span::new(span.source, start, start + x.len()); + Ident::new(x, span) + }), + text: raw.text.clone(), + }) + } + _ => None, } } } -node!( +node! { /// A section heading: `= Introduction`. Heading => HeadingNode -); +} impl HeadingNode { /// The contents of the heading. @@ -125,30 +125,21 @@ impl HeadingNode { } /// The section depth (numer of equals signs). - pub fn level(&self) -> HeadingLevel { + pub fn level(&self) -> u8 { self.0 - .cast_first_child() + .children() + .find_map(|node| match node.kind() { + NodeKind::HeadingLevel(heading) => Some(*heading), + _ => None, + }) .expect("heading node is missing heading level") } } -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct HeadingLevel(pub usize); - -impl TypedNode for HeadingLevel { - fn cast_from(node: RedTicket) -> Option { - if let NodeKind::HeadingLevel(l) = node.kind() { - Some(Self((*l).into())) - } else { - None - } - } -} - -node!( +node! { /// An item in an unordered list: `- ...`. List => ListNode -); +} impl ListNode { /// The contents of the list item. @@ -157,10 +148,10 @@ impl ListNode { } } -node!( +node! { /// An item in an enumeration (ordered list): `1. ...`. Enum => EnumNode -); +} impl EnumNode { /// The contents of the list item. @@ -169,20 +160,13 @@ impl EnumNode { } /// The number, if any. - pub fn number(&self) -> EnumNumber { - self.0.cast_first_child().expect("enumeration node is missing number") - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct EnumNumber(pub Option); - -impl TypedNode for EnumNumber { - fn cast_from(node: RedTicket) -> Option { - if let NodeKind::EnumNumbering(x) = node.kind() { - Some(Self(*x)) - } else { - None - } + pub fn number(&self) -> Option { + self.0 + .children() + .find_map(|node| match node.kind() { + NodeKind::EnumNumbering(num) => Some(num.clone()), + _ => None, + }) + .expect("enumeration node is missing number") } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 88757f8ea..8e04a569f 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -160,8 +160,6 @@ pub enum NodeKind { /// /// The comment can contain nested block comments. BlockComment, - /// A node that should never appear in a finished tree. - Never, /// Tokens that appear in the wrong place. Error(ErrorPosition, EcoString), /// Template markup. @@ -246,7 +244,41 @@ pub enum ErrorPosition { impl Display for NodeKind { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.pad(match self { + f.pad(self.as_str()) + } +} + +impl NodeKind { + pub fn is_parenthesis(&self) -> bool { + match self { + Self::LeftParen => true, + Self::RightParen => true, + _ => false, + } + } + + pub fn is_bracket(&self) -> bool { + match self { + Self::LeftBracket => true, + Self::RightBracket => true, + _ => false, + } + } + + pub fn is_brace(&self) -> bool { + match self { + Self::LeftBrace => true, + Self::RightBrace => true, + _ => false, + } + } + + pub fn is_error(&self) -> bool { + matches!(self, NodeKind::Error(_, _)) + } + + pub fn as_str(&self) -> &'static str { + match self { Self::LeftBracket => "opening bracket", Self::RightBracket => "closing bracket", Self::LeftBrace => "opening brace", @@ -296,7 +328,6 @@ impl Display for NodeKind { Self::Math(_) => "math formula", Self::EnumNumbering(_) => "numbering", Self::Str(_) => "string", - Self::Never => "a node that should not be here", Self::LineComment => "line comment", Self::BlockComment => "block comment", Self::Markup => "markup", @@ -348,45 +379,15 @@ impl Display for NodeKind { "*/" => "end of block comment", _ => "invalid token", }, - }) - } -} - -impl NodeKind { - pub fn is_parenthesis(&self) -> bool { - match self { - Self::LeftParen => true, - Self::RightParen => true, - _ => false, } } - - pub fn is_bracket(&self) -> bool { - match self { - Self::LeftBracket => true, - Self::RightBracket => true, - _ => false, - } - } - - pub fn is_brace(&self) -> bool { - match self { - Self::LeftBrace => true, - Self::RightBrace => true, - _ => false, - } - } - - pub fn is_error(&self) -> bool { - matches!(self, NodeKind::Never | NodeKind::Error(_, _)) - } } /// A syntactical node. #[derive(Clone, PartialEq)] pub struct GreenNode { /// Node metadata. - meta: GreenData, + data: GreenData, /// This node's children, losslessly make up this node. children: Vec, } @@ -400,12 +401,12 @@ pub struct GreenData { /// The byte length of the node in the source. len: usize, /// Whether this node or any of its children are erroneous. - has_error: bool, + erroneous: bool, } impl GreenData { pub fn new(kind: NodeKind, len: usize) -> Self { - Self { len, has_error: kind.is_error(), kind } + Self { len, erroneous: kind.is_error(), kind } } pub fn kind(&self) -> &NodeKind { @@ -416,8 +417,8 @@ impl GreenData { self.len } - pub fn has_error(&self) -> bool { - self.has_error + pub fn erroneous(&self) -> bool { + self.erroneous } } @@ -437,23 +438,23 @@ pub enum Green { } impl Green { - fn meta(&self) -> &GreenData { + fn data(&self) -> &GreenData { match self { Green::Token(t) => &t, - Green::Node(n) => &n.meta, + Green::Node(n) => &n.data, } } pub fn kind(&self) -> &NodeKind { - self.meta().kind() + self.data().kind() } pub fn len(&self) -> usize { - self.meta().len() + self.data().len() } - pub fn has_error(&self) -> bool { - self.meta().has_error() + pub fn erroneous(&self) -> bool { + self.data().erroneous() } pub fn children(&self) -> &[Green] { @@ -467,29 +468,19 @@ impl Green { impl GreenNode { pub fn new(kind: NodeKind, len: usize) -> Self { Self { - meta: GreenData::new(kind, len), + data: GreenData::new(kind, len), children: Vec::new(), } } - pub fn with_children( - kind: NodeKind, - len: usize, - children: impl Iterator>, - ) -> Self { + pub fn with_children(kind: NodeKind, len: usize, children: Vec) -> Self { let mut meta = GreenData::new(kind, len); - let children = children - .map(|x| { - let x = x.into(); - meta.has_error |= x.has_error(); - x - }) - .collect(); - Self { meta, children } + meta.erroneous |= children.iter().any(|c| c.erroneous()); + Self { data: meta, children } } pub fn with_child(kind: NodeKind, len: usize, child: impl Into) -> Self { - Self::with_children(kind, len, std::iter::once(child.into())) + Self::with_children(kind, len, vec![child.into()]) } pub fn children(&self) -> &[Green] { @@ -511,7 +502,7 @@ impl From> for Green { impl Default for Green { fn default() -> Self { - Self::Token(GreenData::new(NodeKind::Never, 0)) + Self::Token(GreenData::new(NodeKind::None, 0)) } } @@ -530,13 +521,13 @@ impl Debug for Green { } #[derive(Copy, Clone, PartialEq)] -pub struct RedTicket<'a> { +pub struct RedRef<'a> { id: SourceId, offset: usize, green: &'a Green, } -impl<'a> RedTicket<'a> { +impl<'a> RedRef<'a> { pub fn own(self) -> RedNode { RedNode { id: self.id, @@ -549,6 +540,9 @@ impl<'a> RedTicket<'a> { self.green.kind() } + pub fn span(&self) -> Span { + Span::new(self.id, self.offset, self.offset + self.green.len()) + } pub fn cast(self) -> Option where @@ -556,6 +550,37 @@ impl<'a> RedTicket<'a> { { T::cast_from(self) } + + pub fn erroneous(&self) -> bool { + self.green.erroneous() + } + + pub fn children(self) -> impl Iterator> + Clone { + let children = match &self.green { + Green::Node(node) => node.children(), + Green::Token(_) => &[], + }; + + let mut offset = self.offset; + children.iter().map(move |green| { + let child_offset = offset; + offset += green.len(); + RedRef { id: self.id, offset: child_offset, green } + }) + } + + pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { + self.children() + .find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind)) + } + + pub(crate) fn cast_first_child(&self) -> Option { + self.children().find_map(RedRef::cast) + } + + pub(crate) fn cast_last_child(&self) -> Option { + self.children().filter_map(RedRef::cast).last() + } } #[derive(Clone, PartialEq)] @@ -571,7 +596,7 @@ impl RedNode { } pub fn span(&self) -> Span { - Span::new(self.id, self.offset, self.offset + self.green.len()) + self.as_ref().span() } pub fn len(&self) -> usize { @@ -582,53 +607,36 @@ impl RedNode { self.green.kind() } - pub fn children<'a>(&'a self) -> impl Iterator> + Clone + 'a { - let children = match &self.green { - Green::Node(node) => node.children(), - Green::Token(_) => &[], - }; - - let mut offset = self.offset; - children.iter().map(move |green_child| { - let child_offset = offset; - offset += green_child.len(); - RedTicket { - id: self.id, - offset: child_offset, - green: &green_child, - } - }) - } - - pub fn has_error(&self) -> bool { - self.green.has_error() + pub fn children<'a>(&'a self) -> impl Iterator> + Clone { + self.as_ref().children() } pub fn errors(&self) -> Vec<(Span, EcoString)> { - if !self.green.has_error() { + if !self.green.erroneous() { return vec![]; } - if let NodeKind::Error(pos, msg) = self.kind() { - let span = match pos { - ErrorPosition::Start => self.span().at_start(), - ErrorPosition::Full => self.span(), - ErrorPosition::End => self.span().at_end(), - }; + match self.kind() { + NodeKind::Error(pos, msg) => { + let span = match pos { + ErrorPosition::Start => self.span().at_start(), + ErrorPosition::Full => self.span(), + ErrorPosition::End => self.span().at_end(), + }; - vec![(span, msg.clone())] - } else if let NodeKind::Never = self.kind() { - vec![(self.span(), "found a never node".into())] - } else { - self.children() - .filter(|ticket| ticket.green.has_error()) - .flat_map(|ticket| ticket.own().errors()) - .collect() + vec![(span, msg.clone())] + } + _ => self + .as_ref() + .children() + .filter(|red| red.green.erroneous()) + .flat_map(|red| red.own().errors()) + .collect(), } } - pub fn ticket<'a>(&'a self) -> RedTicket<'a> { - RedTicket { + pub fn as_ref<'a>(&'a self) -> RedRef<'a> { + RedRef { id: self.id, offset: self.offset, green: &self.green, @@ -636,28 +644,26 @@ impl RedNode { } pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { - self.children() - .find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind)) - .map(RedTicket::own) + self.as_ref().typed_child(kind).map(RedRef::own) } pub(crate) fn cast_first_child(&self) -> Option { - self.children().find_map(RedTicket::cast) + self.as_ref().cast_first_child() } pub(crate) fn cast_last_child(&self) -> Option { - self.children().filter_map(RedTicket::cast).last() + self.as_ref().cast_last_child() } } impl Debug for RedNode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{:?}: {:?}", self.kind(), self.span())?; - let children = self.children().collect::>(); + let children = self.as_ref().children().collect::>(); if !children.is_empty() { f.write_str(" ")?; f.debug_list() - .entries(children.into_iter().map(RedTicket::own)) + .entries(children.into_iter().map(RedRef::own)) .finish()?; } Ok(()) @@ -666,21 +672,22 @@ impl Debug for RedNode { pub trait TypedNode: Sized { /// Performs the conversion. - fn cast_from(value: RedTicket) -> Option; + fn cast_from(value: RedRef) -> Option; } #[macro_export] macro_rules! node { - (#[doc = $doc:expr] $name:ident) => { - node!(#[doc = $doc] $name => $name); + ($(#[$attr:meta])* $name:ident) => { + node!{$(#[$attr])* $name => $name} }; - (#[doc = $doc:expr] $variant:ident => $name:ident) => { - #[doc = $doc] + ($(#[$attr:meta])* $variant:ident => $name:ident) => { #[derive(Debug, Clone, PartialEq)] + #[repr(transparent)] + $(#[$attr])* pub struct $name(RedNode); impl TypedNode for $name { - fn cast_from(node: RedTicket) -> Option { + fn cast_from(node: RedRef) -> Option { if node.kind() != &NodeKind::$variant { return None; } @@ -694,8 +701,8 @@ macro_rules! node { self.0.span() } - pub fn underlying(&self) -> RedTicket { - self.0.ticket() + pub fn underlying(&self) -> RedRef { + self.0.as_ref() } } }; diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index b1c7e02bd..db364eaa1 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -46,20 +46,25 @@ impl Printer { Write::write_fmt(self, fmt) } - /// Write a list of items joined by a joiner. - pub fn join(&mut self, items: I, joiner: &str, mut write_item: F) + /// Write a list of items joined by a joiner and return how many there were. + pub fn join(&mut self, items: I, joiner: &str, mut write_item: F) -> usize where I: IntoIterator, F: FnMut(T, &mut Self), { + let mut count = 0; let mut iter = items.into_iter(); if let Some(first) = iter.next() { write_item(first, self); + count += 1; } for item in iter { self.push_str(joiner); write_item(item, self); + count += 1; } + + count } /// Finish pretty printing and return the underlying buffer. @@ -165,7 +170,7 @@ impl Pretty for RawNode { impl Pretty for HeadingNode { fn pretty(&self, p: &mut Printer) { - for _ in 0 .. self.level().0 { + for _ in 0 .. self.level() { p.push('='); } p.push(' '); @@ -182,7 +187,7 @@ impl Pretty for ListNode { impl Pretty for EnumNode { fn pretty(&self, p: &mut Printer) { - if let Some(number) = self.number().0 { + if let Some(number) = self.number() { write!(p, "{}", number).unwrap(); } p.push_str(". "); @@ -237,8 +242,8 @@ impl Pretty for ArrayExpr { p.push('('); let items = self.items(); - p.join(&items, ", ", |item, p| item.pretty(p)); - if items.len() == 1 { + let len = p.join(items, ", ", |item, p| item.pretty(p)); + if len == 1 { p.push(','); } p.push(')'); @@ -249,11 +254,11 @@ impl Pretty for DictExpr { fn pretty(&self, p: &mut Printer) { p.push('('); - let items = self.items(); - if items.is_empty() { + let mut items = self.items().peekable(); + if items.peek().is_none() { p.push(':'); } else { - p.join(&items, ", ", |named, p| named.pretty(p)); + p.join(items, ", ", |named, p| named.pretty(p)); } p.push(')'); } @@ -287,7 +292,7 @@ impl Pretty for BlockExpr { fn pretty(&self, p: &mut Printer) { p.push('{'); - let exprs = self.exprs(); + let exprs: Vec<_> = self.exprs().collect(); if exprs.len() > 1 { p.push(' '); } @@ -342,8 +347,7 @@ impl Pretty for CallExpr { p.push(')'); }; - let arg_list = self.args(); - let args = arg_list.items(); + let args: Vec<_> = self.args().items().collect(); if let Some(Expr::Template(template)) = args .last() @@ -361,7 +365,7 @@ impl Pretty for CallExpr { impl Pretty for CallArgs { fn pretty(&self, p: &mut Printer) { - p.join(&self.items(), ", ", |item, p| item.pretty(p)); + p.join(self.items(), ", ", |item, p| item.pretty(p)); } } @@ -380,11 +384,12 @@ impl Pretty for CallArg { impl Pretty for ClosureExpr { fn pretty(&self, p: &mut Printer) { - if let [param] = self.params().as_slice() { + let params: Vec<_> = self.params().collect(); + if let [param] = params.as_slice() { param.pretty(p); } else { p.push('('); - p.join(self.params().iter(), ", ", |item, p| item.pretty(p)); + p.join(params.iter(), ", ", |item, p| item.pretty(p)); p.push(')'); } p.push_str(" => "); @@ -420,7 +425,7 @@ impl Pretty for LetExpr { self.binding().pretty(p); if let Some(Expr::Closure(closure)) = &self.init() { p.push('('); - p.join(closure.params().iter(), ", ", |item, p| item.pretty(p)); + p.join(closure.params(), ", ", |item, p| item.pretty(p)); p.push_str(") = "); closure.body().pretty(p); } else if let Some(init) = &self.init() { @@ -487,7 +492,9 @@ impl Pretty for Imports { fn pretty(&self, p: &mut Printer) { match self { Self::Wildcard => p.push('*'), - Self::Idents(idents) => p.join(idents, ", ", |item, p| item.pretty(p)), + Self::Idents(idents) => { + p.join(idents, ", ", |item, p| item.pretty(p)); + } } } } From 1c0ac793d2b9c403f1a8fa60a3748f4ff8623acb Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 31 Oct 2021 15:01:39 +0100 Subject: [PATCH 03/18] Slim `NodeKind` memory footprint --- src/eval/mod.rs | 3 +- src/parse/mod.rs | 39 +++------ src/parse/resolve.rs | 11 +-- src/parse/tokens.rs | 184 ++++++++++++++++++++++++++------------- src/syntax/mod.rs | 6 +- src/syntax/token.rs | 22 +---- tests/typ/markup/raw.typ | 2 +- 7 files changed, 142 insertions(+), 125 deletions(-) diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 8d31c1774..ba266ea58 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -30,7 +30,6 @@ use std::collections::HashMap; use std::io; use std::mem; use std::path::PathBuf; -use std::rc::Rc; use crate::diag::{At, Error, StrResult, Trace, Tracepoint, TypResult}; use crate::geom::{Angle, Fractional, Length, Relative}; @@ -475,7 +474,7 @@ impl Eval for ClosureExpr { // Clone the body expression so that we don't have a lifetime // dependence on the AST. - let body = Rc::new(self.body()); + let body = self.body().clone(); // Define the actual function. let func = Function::new(name, move |ctx, args| { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 0425f8248..773f642c8 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -87,18 +87,10 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::NonBreakingSpace | NodeKind::Emph | NodeKind::Strong - | NodeKind::Linebreak => p.eat(), + | NodeKind::Linebreak + | NodeKind::Raw(_) => p.eat(), NodeKind::UnicodeEscape(u) => { - if !u.terminated { - p.convert(NodeKind::Error( - ErrorPosition::End, - "expected closing brace".into(), - )); - p.unsuccessful(); - return; - } - if u.character.is_none() { let src = p.peek_src(); p.convert(NodeKind::Error( @@ -112,18 +104,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { p.eat(); } - NodeKind::Raw(r) => { - if !r.terminated { - p.convert(NodeKind::Error( - ErrorPosition::End, - "expected backtick(s)".into(), - )); - p.unsuccessful(); - return; - } - - p.eat(); - } NodeKind::Eq if *at_start => heading(p), NodeKind::ListBullet if *at_start => list_node(p), @@ -159,6 +139,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Comments. NodeKind::LineComment | NodeKind::BlockComment => p.eat(), + NodeKind::Error(t, e) if t != &ErrorPosition::Full || e.contains(' ') => p.eat(), _ => { *at_start = false; @@ -338,6 +319,10 @@ fn primary(p: &mut Parser, atomic: bool) { Some(NodeKind::Import) => import_expr(p), Some(NodeKind::Include) => include_expr(p), + Some(NodeKind::Error(t, e)) if t != &ErrorPosition::Full || e.contains(' ') => { + p.eat(); + } + // Nothing. _ => { p.expected("expression"); @@ -363,13 +348,9 @@ fn literal(p: &mut Parser) -> bool { | NodeKind::Fraction(_) | NodeKind::Length(_, _) | NodeKind::Angle(_, _) - | NodeKind::Percentage(_) => p.eat(), - NodeKind::Str(s) => { - p.eat(); - if !s.terminated { - p.expected_at("quote"); - } - } + | NodeKind::Percentage(_) + | NodeKind::Str(_) => p.eat(), + _ => return false, } diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 1b3089a63..8d4c04d49 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -46,12 +46,7 @@ pub fn resolve_hex(sequence: &str) -> Option { } /// Resolve the language tag and trims the raw text. -pub fn resolve_raw( - column: usize, - backticks: u8, - text: &str, - terminated: bool, -) -> RawToken { +pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); @@ -59,7 +54,6 @@ pub fn resolve_raw( lang: Some(tag.into()), text: text.into(), backticks, - terminated, block, } } else { @@ -67,7 +61,6 @@ pub fn resolve_raw( lang: None, text: split_lines(text).join("\n").into(), backticks, - terminated, block: false, } } @@ -194,7 +187,7 @@ mod tests { text: &str, block: bool, ) { - let node = resolve_raw(column, backticks, raw, true); + let node = resolve_raw(column, backticks, raw); assert_eq!(node.lang.as_deref(), lang); assert_eq!(node.text, text); assert_eq!(node.block, block); diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index bfd9f3ed1..8a480b02c 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -5,6 +5,8 @@ use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; +use std::rc::Rc; + /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { source: &'s SourceFile, @@ -239,11 +241,18 @@ impl<'s> Tokens<'s> { self.s.eat_assert('u'); self.s.eat_assert('{'); let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); - NodeKind::UnicodeEscape(UnicodeEscapeToken { - character: resolve_hex(&sequence), - sequence, - terminated: self.s.eat_if('}') - }) + + if self.s.eat_if('}') { + NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { + character: resolve_hex(&sequence), + sequence, + })) + } else { + NodeKind::Error( + ErrorPosition::End, + "expected closing brace".into(), + ) + } } c if c.is_whitespace() => NodeKind::Linebreak, _ => NodeKind::Text("\\".into()), @@ -307,13 +316,12 @@ impl<'s> Tokens<'s> { // Special case for empty inline block. if backticks == 2 { - return NodeKind::Raw(RawToken { + return NodeKind::Raw(Rc::new(RawToken { text: EcoString::new(), lang: None, backticks: 1, - terminated: true, block: false, - }); + })); } let start = self.s.index(); @@ -330,12 +338,26 @@ impl<'s> Tokens<'s> { let terminated = found == backticks; let end = self.s.index() - if terminated { found as usize } else { 0 }; - NodeKind::Raw(resolve_raw( - column, - backticks, - self.s.get(start .. end).into(), - terminated, - )) + if terminated { + NodeKind::Raw(Rc::new(resolve_raw( + column, + backticks, + self.s.get(start .. end).into(), + ))) + } else { + let remaining = backticks - found; + let noun = if remaining == 1 { "backtick" } else { "backticks" }; + + NodeKind::Error( + ErrorPosition::End, + if found == 0 { + format!("expected {} {}", remaining, noun) + } else { + format!("expected {} more {}", remaining, noun) + } + .into(), + ) + } } fn math(&mut self) -> NodeKind { @@ -368,11 +390,22 @@ impl<'s> Tokens<'s> { (true, true) => 2, }; - NodeKind::Math(MathToken { - formula: self.s.get(start .. end).into(), - display, - terminated, - }) + if terminated { + NodeKind::Math(Rc::new(MathToken { + formula: self.s.get(start .. end).into(), + display, + })) + } else { + NodeKind::Error( + ErrorPosition::End, + if display { + "expected closing dollar sign" + } else { + "expected display math closure sequence" + } + .into(), + ) + } } fn ident(&mut self, start: usize) -> NodeKind { @@ -444,17 +477,19 @@ impl<'s> Tokens<'s> { fn string(&mut self) -> NodeKind { let mut escaped = false; - NodeKind::Str(StrToken { - string: resolve_string(self.s.eat_until(|c| { - if c == '"' && !escaped { - true - } else { - escaped = c == '\\' && !escaped; - false - } - })), - terminated: self.s.eat_if('"'), - }) + let string = resolve_string(self.s.eat_until(|c| { + if c == '"' && !escaped { + true + } else { + escaped = c == '\\' && !escaped; + false + } + })); + if self.s.eat_if('"') { + NodeKind::Str(StrToken { string }) + } else { + NodeKind::Error(ErrorPosition::End, "expected quote".into()) + } } fn line_comment(&mut self) -> NodeKind { @@ -526,39 +561,68 @@ mod tests { use TokenMode::{Code, Markup}; fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind { - NodeKind::UnicodeEscape(UnicodeEscapeToken { - character: resolve_hex(sequence), - sequence: sequence.into(), - terminated, - }) + if terminated { + NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { + character: resolve_hex(sequence), + sequence: sequence.into(), + })) + } else { + NodeKind::Error(ErrorPosition::End, "expected closing brace".into()) + } } fn Raw( text: &str, lang: Option<&str>, - backticks: u8, - terminated: bool, + backticks_left: u8, + backticks_right: u8, block: bool, ) -> NodeKind { - NodeKind::Raw(RawToken { - text: text.into(), - lang: lang.map(Into::into), - backticks, - terminated, - block, - }) + if backticks_left == backticks_right { + NodeKind::Raw(Rc::new(RawToken { + text: text.into(), + lang: lang.map(Into::into), + backticks: backticks_left, + block, + })) + } else { + let remaining = backticks_left - backticks_right; + let noun = if remaining == 1 { "backtick" } else { "backticks" }; + + NodeKind::Error( + ErrorPosition::End, + if backticks_right == 0 { + format!("expected {} {}", remaining, noun) + } else { + format!("expected {} more {}", remaining, noun) + } + .into(), + ) + } } fn Math(formula: &str, display: bool, terminated: bool) -> NodeKind { - NodeKind::Math(MathToken { - formula: formula.into(), - display, - terminated, - }) + if terminated { + NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display })) + } else { + NodeKind::Error( + ErrorPosition::End, + if display { + "expected closing dollar sign" + } else { + "expected display math closure sequence" + } + .into(), + ) + } } fn Str(string: &str, terminated: bool) -> NodeKind { - NodeKind::Str(StrToken { string: string.into(), terminated }) + if terminated { + NodeKind::Str(StrToken { string: string.into() }) + } else { + NodeKind::Error(ErrorPosition::End, "expected quote".into()) + } } fn Text(string: &str) -> NodeKind { @@ -844,22 +908,22 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { // Test basic raw block. - t!(Markup: "``" => Raw("", None, 1, true, false)); - t!(Markup: "`raw`" => Raw("raw", None, 1, true, false)); - t!(Markup[""]: "`]" => Raw("]", None, 1, false, false)); + t!(Markup: "``" => Raw("", None, 1, 1, false)); + t!(Markup: "`raw`" => Raw("raw", None, 1, 1, false)); + t!(Markup[""]: "`]" => Raw("]", None, 1, 0, false)); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, true, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, true, false), Raw(" ", None, 1, false, false)); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, 1, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, 1, false), Raw(" ", None, 1, 0, false)); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, true, false)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, 3, false)); // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, 1, true, false), Text("nope"), Raw("", None, 1, true, false)); - t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, true, false)); - t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, false, false)); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, true, false), Raw("", None, 1, true, false)); + t!(Markup: "``nope``" => Raw("", None, 1, 1, false), Text("nope"), Raw("", None, 1, 1, false)); + t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, 4, false)); + t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, 0, false)); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, 4, false), Raw("", None, 1, 1, false)); } #[test] diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 8e04a569f..ca5b6a1b6 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -121,12 +121,12 @@ pub enum NodeKind { Text(EcoString), /// A slash and the letter "u" followed by a hexadecimal unicode entity /// enclosed in curly braces: `\u{1F5FA}`. - UnicodeEscape(UnicodeEscapeToken), + UnicodeEscape(Rc), /// An arbitrary number of backticks followed by inner contents, terminated /// with the same number of backticks: `` `...` ``. - Raw(RawToken), + Raw(Rc), /// Dollar signs surrounding inner contents. - Math(MathToken), + Math(Rc), /// A numbering: `23.`. /// /// Can also exist without the number: `.`. diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 49613667e..5a6214958 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -2,15 +2,10 @@ use crate::util::EcoString; /// A quoted string token: `"..."`. #[derive(Debug, Clone, PartialEq)] +#[repr(transparent)] pub struct StrToken { /// The string inside the quotes. - /// - /// _Note_: If the string contains escape sequences these are not yet - /// applied to be able to just store a string slice here instead of - /// a `String`. The resolving is done later in the parser. pub string: EcoString, - /// Whether the closing quote was present. - pub terminated: bool, } /// A raw block token: `` `...` ``. @@ -22,8 +17,6 @@ pub struct RawToken { pub lang: Option, /// The number of opening backticks. pub backticks: u8, - /// Whether all closing backticks were present. - pub terminated: bool, /// Whether to display this as a block. pub block: bool, } @@ -36,8 +29,6 @@ pub struct MathToken { /// Whether the formula is display-level, that is, it is surrounded by /// `$[..]`. pub display: bool, - /// Whether the closing dollars were present. - pub terminated: bool, } /// A unicode escape sequence token: `\u{1F5FA}`. @@ -47,15 +38,4 @@ pub struct UnicodeEscapeToken { pub sequence: EcoString, /// The resulting unicode character. pub character: Option, - /// Whether the closing brace was present. - pub terminated: bool, -} - -/// A unit-bound number token: `1.2em`. -#[derive(Debug, Clone, PartialEq)] -pub struct UnitToken { - /// The number part. - pub number: std::ops::Range, - /// The unit part. - pub unit: std::ops::Range, } diff --git a/tests/typ/markup/raw.typ b/tests/typ/markup/raw.typ index d48432f73..0e053a9b3 100644 --- a/tests/typ/markup/raw.typ +++ b/tests/typ/markup/raw.typ @@ -55,5 +55,5 @@ The keyword ```rust let```. --- // Unterminated. -// Error: 2:1 expected backtick(s) +// Error: 2:1 expected 1 backtick `endless From c569e14c07902b23b7b3e29df4076cea1f4496cf Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 31 Oct 2021 16:22:33 +0100 Subject: [PATCH 04/18] Improve error handling --- benches/oneshot.rs | 10 ++++- src/eval/walk.rs | 2 +- src/parse/mod.rs | 87 +++++++++++++++--------------------- src/parse/tokens.rs | 102 +++++++++++++++++++------------------------ src/syntax/expr.rs | 44 ++++++++----------- src/syntax/markup.rs | 29 +++++------- src/syntax/mod.rs | 5 ++- src/syntax/pretty.rs | 2 +- 8 files changed, 122 insertions(+), 159 deletions(-) diff --git a/benches/oneshot.rs b/benches/oneshot.rs index 6ce81a639..9a57825d1 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -44,7 +44,13 @@ fn bench_scan(iai: &mut Iai) { } fn bench_tokenize(iai: &mut Iai) { - iai.run(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup)).count()); + iai.run(|| { + Tokens::new( + black_box(&SourceFile::detached(SRC)), + black_box(TokenMode::Markup), + ) + .count() + }); } fn bench_parse(iai: &mut Iai) { @@ -53,7 +59,7 @@ fn bench_parse(iai: &mut Iai) { fn bench_eval(iai: &mut Iai) { let (mut ctx, id) = context(); - let ast = ctx.parse(id).unwrap(); + let ast = ctx.sources.get(id).ast().unwrap(); iai.run(|| eval(&mut ctx, id, &ast).unwrap()); } diff --git a/src/eval/walk.rs b/src/eval/walk.rs index b28f4fde7..e4f8ac7b8 100644 --- a/src/eval/walk.rs +++ b/src/eval/walk.rs @@ -16,7 +16,7 @@ pub trait Walk { impl Walk for Markup { fn walk(&self, ctx: &mut EvalContext) -> TypResult<()> { - for node in self.iter() { + for node in self.nodes() { node.walk(ctx)?; } Ok(()) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 773f642c8..ce992834c 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -138,8 +138,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { NodeKind::LeftBracket => template(p), // Comments. - NodeKind::LineComment | NodeKind::BlockComment => p.eat(), - NodeKind::Error(t, e) if t != &ErrorPosition::Full || e.contains(' ') => p.eat(), + NodeKind::LineComment | NodeKind::BlockComment | NodeKind::Error(_, _) => p.eat(), _ => { *at_start = false; @@ -319,7 +318,7 @@ fn primary(p: &mut Parser, atomic: bool) { Some(NodeKind::Import) => import_expr(p), Some(NodeKind::Include) => include_expr(p), - Some(NodeKind::Error(t, e)) if t != &ErrorPosition::Full || e.contains(' ') => { + Some(NodeKind::Error(_, _)) => { p.eat(); } @@ -333,28 +332,26 @@ fn primary(p: &mut Parser, atomic: bool) { /// Parse a literal. fn literal(p: &mut Parser) -> bool { - let peeked = match p.peek() { - Some(x) => x.clone(), - None => return false, - }; - - match peeked { + match p.peek() { // Basic values. - NodeKind::None - | NodeKind::Auto - | NodeKind::Int(_) - | NodeKind::Float(_) - | NodeKind::Bool(_) - | NodeKind::Fraction(_) - | NodeKind::Length(_, _) - | NodeKind::Angle(_, _) - | NodeKind::Percentage(_) - | NodeKind::Str(_) => p.eat(), + Some( + NodeKind::None + | NodeKind::Auto + | NodeKind::Int(_) + | NodeKind::Float(_) + | NodeKind::Bool(_) + | NodeKind::Fraction(_) + | NodeKind::Length(_, _) + | NodeKind::Angle(_, _) + | NodeKind::Percentage(_) + | NodeKind::Str(_), + ) => { + p.eat(); + true + } - _ => return false, + _ => false, } - - true } /// Parse something that starts with a parenthesis, which can be either of: @@ -395,11 +392,11 @@ fn parenthesized(p: &mut Parser) { // Find out which kind of collection this is. match kind { CollectionKind::Group => p.end(NodeKind::Group), - CollectionKind::PositionalCollection => { + CollectionKind::Positional => { p.lift(); array(p, token_count); } - CollectionKind::NamedCollection => { + CollectionKind::Named => { p.lift(); dict(p, token_count); } @@ -413,9 +410,9 @@ enum CollectionKind { Group, /// The collection starts with a positional and has more items or a trailing /// comma. - PositionalCollection, + Positional, /// The collection starts with a named item. - NamedCollection, + Named, } /// Parse a collection. @@ -424,20 +421,19 @@ enum CollectionKind { /// commas. fn collection(p: &mut Parser) -> (CollectionKind, usize) { let mut items = 0; - let mut kind = CollectionKind::PositionalCollection; - let mut seen_spread = false; + let mut kind = CollectionKind::Positional; let mut has_comma = false; let mut missing_coma = None; while !p.eof() { let item_kind = item(p); if p.success() { - if items == 0 && item_kind == CollectionItemKind::Named { - kind = CollectionKind::NamedCollection; + if items == 0 && item_kind == NodeKind::Named { + kind = CollectionKind::Named; } - if item_kind == CollectionItemKind::ParameterSink { - seen_spread = true; + if item_kind == NodeKind::ParameterSink { + has_comma = true; } items += 1; @@ -458,42 +454,27 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { } } - if !has_comma - && items == 1 - && !seen_spread - && kind == CollectionKind::PositionalCollection - { + if !has_comma && items == 1 && kind == CollectionKind::Positional { kind = CollectionKind::Group; } (kind, items) } -/// What kind of item is this? -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum CollectionItemKind { - /// A named item. - Named, - /// An unnamed item. - Unnamed, - /// A parameter sink. - ParameterSink, -} - /// Parse an expression or a named pair. Returns if this is a named pair. -fn item(p: &mut Parser) -> CollectionItemKind { +fn item(p: &mut Parser) -> NodeKind { p.start(); if p.eat_if(&NodeKind::Dots) { expr(p); p.end_or_abort(NodeKind::ParameterSink); - return CollectionItemKind::ParameterSink; + return NodeKind::ParameterSink; } expr(p); if p.may_lift_abort() { - return CollectionItemKind::Unnamed; + return NodeKind::None; } if p.eat_if(&NodeKind::Colon) { @@ -512,10 +493,10 @@ fn item(p: &mut Parser) -> CollectionItemKind { p.unsuccessful(); } - CollectionItemKind::Named + NodeKind::Named } else { p.lift(); - CollectionItemKind::Unnamed + p.last_child().unwrap().kind().clone() } } diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 8a480b02c..7c500ce79 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -91,7 +91,7 @@ impl<'s> Iterator for Tokens<'s> { '/' if self.s.eat_if('*') => self.block_comment(), '/' if !self.maybe_in_url() && self.s.eat_if('/') => self.line_comment(), '*' if self.s.eat_if('/') => { - NodeKind::Error(ErrorPosition::Full, self.s.eaten_from(start).into()) + NodeKind::Unknown(self.s.eaten_from(start).into()) } // Other things. @@ -173,7 +173,7 @@ impl<'s> Tokens<'s> { // Strings. '"' => self.string(), - _ => NodeKind::Error(ErrorPosition::Full, self.s.eaten_from(start).into()), + _ => NodeKind::Unknown(self.s.eaten_from(start).into()), } } @@ -398,10 +398,10 @@ impl<'s> Tokens<'s> { } else { NodeKind::Error( ErrorPosition::End, - if display { + if !display || (!escaped && dollar) { "expected closing dollar sign" } else { - "expected display math closure sequence" + "expected closing bracket and dollar sign" } .into(), ) @@ -466,11 +466,11 @@ impl<'s> Tokens<'s> { "deg" => NodeKind::Angle(f, AngularUnit::Deg), "rad" => NodeKind::Angle(f, AngularUnit::Rad), _ => { - return NodeKind::Error(ErrorPosition::Full, all.into()); + return NodeKind::Unknown(all.into()); } } } else { - NodeKind::Error(ErrorPosition::Full, all.into()) + NodeKind::Unknown(all.into()) } } @@ -575,45 +575,31 @@ mod tests { text: &str, lang: Option<&str>, backticks_left: u8, - backticks_right: u8, + err_msg: Option<&str>, block: bool, ) -> NodeKind { - if backticks_left == backticks_right { - NodeKind::Raw(Rc::new(RawToken { + match err_msg { + None => NodeKind::Raw(Rc::new(RawToken { text: text.into(), lang: lang.map(Into::into), backticks: backticks_left, block, - })) - } else { - let remaining = backticks_left - backticks_right; - let noun = if remaining == 1 { "backtick" } else { "backticks" }; - - NodeKind::Error( - ErrorPosition::End, - if backticks_right == 0 { - format!("expected {} {}", remaining, noun) - } else { - format!("expected {} more {}", remaining, noun) - } - .into(), - ) + })), + Some(msg) => { + NodeKind::Error(ErrorPosition::End, format!("expected {}", msg).into()) + } } } - fn Math(formula: &str, display: bool, terminated: bool) -> NodeKind { - if terminated { - NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display })) - } else { - NodeKind::Error( + fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { + match err_msg { + None => { + NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display })) + } + Some(msg) => NodeKind::Error( ErrorPosition::End, - if display { - "expected closing dollar sign" - } else { - "expected display math closure sequence" - } - .into(), - ) + format!("expected closing {}", msg).into(), + ), } } @@ -634,7 +620,7 @@ mod tests { } fn Invalid(invalid: &str) -> NodeKind { - NodeKind::Error(ErrorPosition::Full, invalid.into()) + NodeKind::Unknown(invalid.into()) } /// Building blocks for suffix testing. @@ -687,7 +673,7 @@ mod tests { ('/', None, "//", LineComment), ('/', None, "/**/", BlockComment), ('/', Some(Markup), "*", Strong), - ('/', Some(Markup), "$ $", Math(" ", false, true)), + ('/', Some(Markup), "$ $", Math(" ", false, None)), ('/', Some(Markup), r"\\", Text("\\")), ('/', Some(Markup), "#let", Let), ('/', Some(Code), "(", LeftParen), @@ -908,42 +894,42 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { // Test basic raw block. - t!(Markup: "``" => Raw("", None, 1, 1, false)); - t!(Markup: "`raw`" => Raw("raw", None, 1, 1, false)); - t!(Markup[""]: "`]" => Raw("]", None, 1, 0, false)); + t!(Markup: "``" => Raw("", None, 1, None, false)); + t!(Markup: "`raw`" => Raw("raw", None, 1, None, false)); + t!(Markup[""]: "`]" => Raw("]", None, 1, Some("1 backtick"), false)); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, 1, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, 1, false), Raw(" ", None, 1, 0, false)); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, None, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, None, false), Raw(" ", None, 1, Some("1 backtick"), false)); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, 3, false)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, None, false)); // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, 1, 1, false), Text("nope"), Raw("", None, 1, 1, false)); - t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, 4, false)); - t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, 0, false)); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, 4, false), Raw("", None, 1, 1, false)); + t!(Markup: "``nope``" => Raw("", None, 1, None, false), Text("nope"), Raw("", None, 1, None, false)); + t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, None, false)); + t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, Some("5 backticks"), false)); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, None, false), Raw("", None, 1, None, false)); } #[test] fn test_tokenize_math_formulas() { // Test basic formula. - t!(Markup: "$$" => Math("", false, true)); - t!(Markup: "$x$" => Math("x", false, true)); - t!(Markup: r"$\\$" => Math(r"\\", false, true)); - t!(Markup: "$[x + y]$" => Math("x + y", true, true)); - t!(Markup: r"$[\\]$" => Math(r"\\", true, true)); + t!(Markup: "$$" => Math("", false, None)); + t!(Markup: "$x$" => Math("x", false, None)); + t!(Markup: r"$\\$" => Math(r"\\", false, None)); + t!(Markup: "$[x + y]$" => Math("x + y", true, None)); + t!(Markup: r"$[\\]$" => Math(r"\\", true, None)); // Test unterminated. - t!(Markup[""]: "$x" => Math("x", false, false)); - t!(Markup[""]: "$[x" => Math("x", true, false)); - t!(Markup[""]: "$[x]\n$" => Math("x]\n$", true, false)); + t!(Markup[""]: "$x" => Math("x", false, Some("dollar sign"))); + t!(Markup[""]: "$[x" => Math("x", true, Some("bracket and dollar sign"))); + t!(Markup[""]: "$[x]\n$" => Math("x]\n$", true, Some("bracket and dollar sign"))); // Test escape sequences. - t!(Markup: r"$\$x$" => Math(r"\$x", false, true)); - t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true, true)); - t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, false)); + t!(Markup: r"$\$x$" => Math(r"\$x", false, None)); + t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true, None)); + t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, Some("bracket and dollar sign"))); } #[test] diff --git a/src/syntax/expr.rs b/src/syntax/expr.rs index 8562a3a49..1439cbdb6 100644 --- a/src/syntax/expr.rs +++ b/src/syntax/expr.rs @@ -87,32 +87,24 @@ impl Expr { impl TypedNode for Expr { fn cast_from(node: RedRef) -> Option { match node.kind() { - NodeKind::Ident(_) => Some(Self::Ident(Ident::cast_from(node).unwrap())), - NodeKind::Array => Some(Self::Array(ArrayExpr::cast_from(node).unwrap())), - NodeKind::Dict => Some(Self::Dict(DictExpr::cast_from(node).unwrap())), - NodeKind::Template => { - Some(Self::Template(TemplateExpr::cast_from(node).unwrap())) - } - NodeKind::Group => Some(Self::Group(GroupExpr::cast_from(node).unwrap())), - NodeKind::Block => Some(Self::Block(BlockExpr::cast_from(node).unwrap())), - NodeKind::Unary => Some(Self::Unary(UnaryExpr::cast_from(node).unwrap())), - NodeKind::Binary => Some(Self::Binary(BinaryExpr::cast_from(node).unwrap())), - NodeKind::Call => Some(Self::Call(CallExpr::cast_from(node).unwrap())), - NodeKind::Closure => { - Some(Self::Closure(ClosureExpr::cast_from(node).unwrap())) - } - NodeKind::WithExpr => Some(Self::With(WithExpr::cast_from(node).unwrap())), - NodeKind::LetExpr => Some(Self::Let(LetExpr::cast_from(node).unwrap())), - NodeKind::IfExpr => Some(Self::If(IfExpr::cast_from(node).unwrap())), - NodeKind::WhileExpr => Some(Self::While(WhileExpr::cast_from(node).unwrap())), - NodeKind::ForExpr => Some(Self::For(ForExpr::cast_from(node).unwrap())), - NodeKind::ImportExpr => { - Some(Self::Import(ImportExpr::cast_from(node).unwrap())) - } - NodeKind::IncludeExpr => { - Some(Self::Include(IncludeExpr::cast_from(node).unwrap())) - } - _ => Some(Self::Lit(Lit::cast_from(node)?)), + NodeKind::Ident(_) => node.cast().map(Self::Ident), + NodeKind::Array => node.cast().map(Self::Array), + NodeKind::Dict => node.cast().map(Self::Dict), + NodeKind::Template => node.cast().map(Self::Template), + NodeKind::Group => node.cast().map(Self::Group), + NodeKind::Block => node.cast().map(Self::Block), + NodeKind::Unary => node.cast().map(Self::Unary), + NodeKind::Binary => node.cast().map(Self::Binary), + NodeKind::Call => node.cast().map(Self::Call), + NodeKind::Closure => node.cast().map(Self::Closure), + NodeKind::WithExpr => node.cast().map(Self::With), + NodeKind::LetExpr => node.cast().map(Self::Let), + NodeKind::IfExpr => node.cast().map(Self::If), + NodeKind::WhileExpr => node.cast().map(Self::While), + NodeKind::ForExpr => node.cast().map(Self::For), + NodeKind::ImportExpr => node.cast().map(Self::Import), + NodeKind::IncludeExpr => node.cast().map(Self::Include), + _ => node.cast().map(Self::Lit), } } } diff --git a/src/syntax/markup.rs b/src/syntax/markup.rs index de547f769..49b2a519e 100644 --- a/src/syntax/markup.rs +++ b/src/syntax/markup.rs @@ -3,17 +3,14 @@ use crate::node; use crate::util::EcoString; use std::fmt::Write; -/// The syntactical root capable of representing a full parsed document. -pub type Markup = Vec; +node! { + /// The syntactical root capable of representing a full parsed document. + Markup +} -impl TypedNode for Markup { - fn cast_from(node: RedRef) -> Option { - if node.kind() != &NodeKind::Markup { - return None; - } - - let children = node.children().filter_map(TypedNode::cast_from).collect(); - Some(children) +impl Markup { + pub fn nodes<'a>(&'a self) -> impl Iterator + 'a { + self.0.children().filter_map(RedRef::cast) } } @@ -66,14 +63,12 @@ impl TypedNode for MarkupNode { NodeKind::NonBreakingSpace => { Some(MarkupNode::Text(EcoString::from("\u{00A0}"))) } - NodeKind::Raw(_) => Some(MarkupNode::Raw(RawNode::cast_from(node).unwrap())), - NodeKind::Heading => { - Some(MarkupNode::Heading(HeadingNode::cast_from(node).unwrap())) - } - NodeKind::List => Some(MarkupNode::List(ListNode::cast_from(node).unwrap())), - NodeKind::Enum => Some(MarkupNode::Enum(EnumNode::cast_from(node).unwrap())), + NodeKind::Raw(_) => node.cast().map(MarkupNode::Raw), + NodeKind::Heading => node.cast().map(MarkupNode::Heading), + NodeKind::List => node.cast().map(MarkupNode::List), + NodeKind::Enum => node.cast().map(MarkupNode::Enum), NodeKind::Error(_, _) => None, - _ => Some(MarkupNode::Expr(Expr::cast_from(node)?)), + _ => node.cast().map(MarkupNode::Expr), } } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index ca5b6a1b6..afa0ab86a 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -162,6 +162,8 @@ pub enum NodeKind { BlockComment, /// Tokens that appear in the wrong place. Error(ErrorPosition, EcoString), + /// Unknown character sequences. + Unknown(EcoString), /// Template markup. Markup, /// A forced line break: `\`. @@ -375,10 +377,11 @@ impl NodeKind { Self::ImportExpr => "import expression", Self::ImportItems => "import items", Self::IncludeExpr => "include expression", - Self::Error(_, src) => match src.as_str() { + Self::Unknown(src) => match src.as_str() { "*/" => "end of block comment", _ => "invalid token", }, + Self::Error(_, _) => "parse error", } } } diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index db364eaa1..da0bdd443 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -82,7 +82,7 @@ impl Write for Printer { impl Pretty for Markup { fn pretty(&self, p: &mut Printer) { - for node in self { + for node in self.nodes() { node.pretty(p); } } From 2e7d359e59a45849f53eea6e022ca83295f5a6e7 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 31 Oct 2021 18:52:48 +0100 Subject: [PATCH 05/18] Unicode escape error moved to tokenizer --- benches/oneshot.rs | 12 ++---- src/parse/mod.rs | 36 ++++++----------- src/parse/parser.rs | 24 ++++++++++-- src/parse/tokens.rs | 92 ++++++++++++++++++++------------------------ src/syntax/markup.rs | 10 +---- src/syntax/mod.rs | 2 +- src/syntax/token.rs | 5 +-- 7 files changed, 82 insertions(+), 99 deletions(-) diff --git a/benches/oneshot.rs b/benches/oneshot.rs index 9a57825d1..a42a710d9 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -44,17 +44,13 @@ fn bench_scan(iai: &mut Iai) { } fn bench_tokenize(iai: &mut Iai) { - iai.run(|| { - Tokens::new( - black_box(&SourceFile::detached(SRC)), - black_box(TokenMode::Markup), - ) - .count() - }); + let src = SourceFile::detached(SRC); + iai.run(|| Tokens::new(black_box(&src), black_box(TokenMode::Markup)).count()); } fn bench_parse(iai: &mut Iai) { - iai.run(|| parse(&SourceFile::detached(SRC))); + let src = SourceFile::detached(SRC); + iai.run(|| parse(&src)); } fn bench_eval(iai: &mut Iai) { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index ce992834c..8775e8a17 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -54,7 +54,10 @@ where while !p.eof() && f(p) { markup_node(p, &mut at_start); if let Some(node) = p.last_child() { - at_start &= matches!(node.kind(), &NodeKind::Space(_) | &NodeKind::Parbreak | &NodeKind::LineComment | &NodeKind::BlockComment); + at_start &= matches!(node.kind(), + &NodeKind::Space(_) | &NodeKind::Parbreak | + &NodeKind::LineComment | &NodeKind::BlockComment + ); } } @@ -88,22 +91,8 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::Emph | NodeKind::Strong | NodeKind::Linebreak - | NodeKind::Raw(_) => p.eat(), - - NodeKind::UnicodeEscape(u) => { - if u.character.is_none() { - let src = p.peek_src(); - p.convert(NodeKind::Error( - ErrorPosition::Full, - "invalid unicode escape sequence".into(), - )); - p.start(); - p.end(NodeKind::Text(src.into())); - return; - } - - p.eat(); - } + | NodeKind::Raw(_) + | NodeKind::UnicodeEscape(_) => p.eat(), NodeKind::Eq if *at_start => heading(p), NodeKind::ListBullet if *at_start => list_node(p), @@ -503,9 +492,8 @@ fn item(p: &mut Parser) -> NodeKind { /// Convert a collection into an array, producing errors for anything other than /// expressions. fn array(p: &mut Parser, items: usize) { - p.start_with(items); p.filter_children( - 0, + p.child_count() - items, |x| match x.kind() { NodeKind::Named | NodeKind::ParameterSink => false, _ => true, @@ -522,15 +510,14 @@ fn array(p: &mut Parser, items: usize) { }, ); - p.end(NodeKind::Array) + p.convert_with(items, NodeKind::Array); } /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. fn dict(p: &mut Parser, items: usize) { - p.start_with(items); p.filter_children( - 0, + p.child_count() - items, |x| { x.kind() == &NodeKind::Named || x.kind().is_parenthesis() @@ -547,7 +534,7 @@ fn dict(p: &mut Parser, items: usize) { ), }, ); - p.end(NodeKind::Dict); + p.convert_with(items, NodeKind::Dict); } /// Convert a collection into a list of parameters, producing errors for @@ -684,8 +671,7 @@ fn let_expr(p: &mut Parser) { return; } - p.start_with(p.child_count() - offset); - p.end(NodeKind::Closure) + p.convert_with(p.child_count() - offset, NodeKind::Closure); } } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index e6fcc1aed..240de43d7 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -186,9 +186,27 @@ impl<'s> Parser<'s> { } pub fn convert(&mut self, kind: NodeKind) { - self.start(); - self.eat(); - self.end(kind); + let len = self.tokens.index() - self.next_start; + + self.children.push( + GreenNode::with_child( + kind, + len, + GreenData::new(self.next.clone().unwrap(), len), + ) + .into(), + ); + self.fast_forward(); + self.success = true; + } + + pub fn convert_with(&mut self, preserve: usize, kind: NodeKind) { + let preserved: Vec<_> = + self.children.drain(self.children.len() - preserve ..).collect(); + let len = preserved.iter().map(|c| c.len()).sum(); + self.children + .push(GreenNode::with_children(kind, len, preserved).into()); + self.success = true; } /// End the current node and undo its existence, inling all accumulated diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 7c500ce79..1d2e32ec5 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -200,7 +200,7 @@ impl<'s> Tokens<'s> { TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace()) }); - NodeKind::Text(resolve_string(self.s.eaten_from(start))) + NodeKind::Text(self.s.eaten_from(start).into()) } fn whitespace(&mut self) -> NodeKind { @@ -243,10 +243,16 @@ impl<'s> Tokens<'s> { let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); if self.s.eat_if('}') { - NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { - character: resolve_hex(&sequence), - sequence, - })) + if let Some(character) = resolve_hex(&sequence) { + NodeKind::UnicodeEscape(UnicodeEscapeToken { + character, + }) + } else { + NodeKind::Error( + ErrorPosition::Full, + "invalid unicode escape sequence".into(), + ) + } } else { NodeKind::Error( ErrorPosition::End, @@ -560,35 +566,21 @@ mod tests { use Option::None; use TokenMode::{Code, Markup}; - fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind { - if terminated { - NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { - character: resolve_hex(sequence), - sequence: sequence.into(), - })) - } else { - NodeKind::Error(ErrorPosition::End, "expected closing brace".into()) - } + fn UnicodeEscape(character: char) -> NodeKind { + NodeKind::UnicodeEscape(UnicodeEscapeToken { character }) } - fn Raw( - text: &str, - lang: Option<&str>, - backticks_left: u8, - err_msg: Option<&str>, - block: bool, - ) -> NodeKind { - match err_msg { - None => NodeKind::Raw(Rc::new(RawToken { - text: text.into(), - lang: lang.map(Into::into), - backticks: backticks_left, - block, - })), - Some(msg) => { - NodeKind::Error(ErrorPosition::End, format!("expected {}", msg).into()) - } - } + fn Error(pos: ErrorPosition, message: &str) -> NodeKind { + NodeKind::Error(pos, message.into()) + } + + fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind { + NodeKind::Raw(Rc::new(RawToken { + text: text.into(), + lang: lang.map(Into::into), + backticks: backticks_left, + block, + })) } fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { @@ -795,16 +787,16 @@ mod tests { t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\"")); // Test basic unicode escapes. - t!(Markup: r"\u{}" => UnicodeEscape("", true)); - t!(Markup: r"\u{2603}" => UnicodeEscape("2603", true)); - t!(Markup: r"\u{P}" => UnicodeEscape("P", true)); + t!(Markup: r"\u{}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); + t!(Markup: r"\u{2603}" => UnicodeEscape('☃')); + t!(Markup: r"\u{P}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); // Test unclosed unicode escapes. - t!(Markup[" /"]: r"\u{" => UnicodeEscape("", false)); - t!(Markup[" /"]: r"\u{1" => UnicodeEscape("1", false)); - t!(Markup[" /"]: r"\u{26A4" => UnicodeEscape("26A4", false)); - t!(Markup[" /"]: r"\u{1Q3P" => UnicodeEscape("1Q3P", false)); - t!(Markup: r"\u{1🏕}" => UnicodeEscape("1", false), Text("🏕"), RightBrace); + t!(Markup[" /"]: r"\u{" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup[" /"]: r"\u{26A4" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1Q3P" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup: r"\u{1🏕}" => Error(ErrorPosition::End, "expected closing brace"), Text("🏕"), RightBrace); } #[test] @@ -894,22 +886,22 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { // Test basic raw block. - t!(Markup: "``" => Raw("", None, 1, None, false)); - t!(Markup: "`raw`" => Raw("raw", None, 1, None, false)); - t!(Markup[""]: "`]" => Raw("]", None, 1, Some("1 backtick"), false)); + t!(Markup: "``" => Raw("", None, 1, false)); + t!(Markup: "`raw`" => Raw("raw", None, 1, false)); + t!(Markup[""]: "`]" => Error(ErrorPosition::End, "expected 1 backtick")); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, None, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, None, false), Raw(" ", None, 1, Some("1 backtick"), false)); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(ErrorPosition::End, "expected 1 backtick")); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, None, false)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false)); // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, 1, None, false), Text("nope"), Raw("", None, 1, None, false)); - t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, None, false)); - t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, Some("5 backticks"), false)); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, None, false), Raw("", None, 1, None, false)); + t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); + t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false)); + t!(Markup[""]: "`````👩‍🚀````noend" => Error(ErrorPosition::End, "expected 5 backticks")); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); } #[test] diff --git a/src/syntax/markup.rs b/src/syntax/markup.rs index 49b2a519e..f43a618a0 100644 --- a/src/syntax/markup.rs +++ b/src/syntax/markup.rs @@ -1,7 +1,6 @@ use super::{Expr, Ident, NodeKind, RedNode, RedRef, Span, TypedNode}; use crate::node; use crate::util::EcoString; -use std::fmt::Write; node! { /// The syntactical root capable of representing a full parsed document. @@ -50,14 +49,7 @@ impl TypedNode for MarkupNode { NodeKind::Strong => Some(MarkupNode::Strong), NodeKind::Emph => Some(MarkupNode::Emph), NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), - NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(match u.character { - Some(c) => c.into(), - None => { - let mut eco = EcoString::with_capacity(u.sequence.len() + 4); - write!(&mut eco, "\\u{{{}}}", u.sequence).unwrap(); - eco - } - })), + NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(u.character.into())), NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))), NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))), NodeKind::NonBreakingSpace => { diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index afa0ab86a..9d4beb6cb 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -121,7 +121,7 @@ pub enum NodeKind { Text(EcoString), /// A slash and the letter "u" followed by a hexadecimal unicode entity /// enclosed in curly braces: `\u{1F5FA}`. - UnicodeEscape(Rc), + UnicodeEscape(UnicodeEscapeToken), /// An arbitrary number of backticks followed by inner contents, terminated /// with the same number of backticks: `` `...` ``. Raw(Rc), diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 5a6214958..4f43bb4f5 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -33,9 +33,8 @@ pub struct MathToken { /// A unicode escape sequence token: `\u{1F5FA}`. #[derive(Debug, Clone, PartialEq)] +#[repr(transparent)] pub struct UnicodeEscapeToken { - /// The escape sequence between the braces. - pub sequence: EcoString, /// The resulting unicode character. - pub character: Option, + pub character: char, } From 7d34a548ccd14debe0668e23454e1ced70e485ec Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 1 Nov 2021 10:57:45 +0100 Subject: [PATCH 06/18] Reorganize syntax module --- src/parse/mod.rs | 4 +- src/syntax/{expr.rs => ast.rs} | 158 +++++- src/syntax/markup.rs | 159 ------ src/syntax/mod.rs | 899 +++++++++++++++++---------------- src/syntax/token.rs | 40 -- 5 files changed, 626 insertions(+), 634 deletions(-) rename src/syntax/{expr.rs => ast.rs} (83%) delete mode 100644 src/syntax/markup.rs delete mode 100644 src/syntax/token.rs diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 8775e8a17..22288d01a 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -520,7 +520,7 @@ fn dict(p: &mut Parser, items: usize) { p.child_count() - items, |x| { x.kind() == &NodeKind::Named - || x.kind().is_parenthesis() + || x.kind().is_paren() || x.kind() == &NodeKind::Comma || x.kind() == &NodeKind::Colon }, @@ -550,7 +550,7 @@ fn params(p: &mut Parser, count: usize, allow_parens: bool) { ), _ => false, } - || (allow_parens && x.kind().is_parenthesis()), + || (allow_parens && x.kind().is_paren()), |_| (ErrorPosition::Full, "expected identifier".into()), ); } diff --git a/src/syntax/expr.rs b/src/syntax/ast.rs similarity index 83% rename from src/syntax/expr.rs rename to src/syntax/ast.rs index 1439cbdb6..bdd0767de 100644 --- a/src/syntax/expr.rs +++ b/src/syntax/ast.rs @@ -1,8 +1,164 @@ -use super::{Ident, Markup, NodeKind, RedNode, RedRef, Span, TypedNode}; +use super::{Ident, NodeKind, RedNode, RedRef, Span, TypedNode}; use crate::geom::{AngularUnit, LengthUnit}; use crate::node; use crate::util::EcoString; +node! { + /// The syntactical root capable of representing a full parsed document. + Markup +} + +impl Markup { + pub fn nodes<'a>(&'a self) -> impl Iterator + 'a { + self.0.children().filter_map(RedRef::cast) + } +} + +/// A single piece of markup. +#[derive(Debug, Clone, PartialEq)] +pub enum MarkupNode { + /// Whitespace containing less than two newlines. + Space, + /// A forced line break: `\`. + Linebreak, + /// A paragraph break: Two or more newlines. + Parbreak, + /// Strong text was enabled / disabled: `*`. + Strong, + /// Emphasized text was enabled / disabled: `_`. + Emph, + /// Plain text. + Text(EcoString), + /// A raw block with optional syntax highlighting: `` `...` ``. + Raw(RawNode), + /// A section heading: `= Introduction`. + Heading(HeadingNode), + /// An item in an unordered list: `- ...`. + List(ListNode), + /// An item in an enumeration (ordered list): `1. ...`. + Enum(EnumNode), + /// An expression. + Expr(Expr), +} + +impl TypedNode for MarkupNode { + fn cast_from(node: RedRef) -> Option { + match node.kind() { + NodeKind::Space(_) => Some(MarkupNode::Space), + NodeKind::Linebreak => Some(MarkupNode::Linebreak), + NodeKind::Parbreak => Some(MarkupNode::Parbreak), + NodeKind::Strong => Some(MarkupNode::Strong), + NodeKind::Emph => Some(MarkupNode::Emph), + NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), + NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(u.character.into())), + NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))), + NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))), + NodeKind::NonBreakingSpace => { + Some(MarkupNode::Text(EcoString::from("\u{00A0}"))) + } + NodeKind::Raw(_) => node.cast().map(MarkupNode::Raw), + NodeKind::Heading => node.cast().map(MarkupNode::Heading), + NodeKind::List => node.cast().map(MarkupNode::List), + NodeKind::Enum => node.cast().map(MarkupNode::Enum), + NodeKind::Error(_, _) => None, + _ => node.cast().map(MarkupNode::Expr), + } + } +} + +/// A raw block with optional syntax highlighting: `` `...` ``. +#[derive(Debug, Clone, PartialEq)] +pub struct RawNode { + /// An optional identifier specifying the language to syntax-highlight in. + pub lang: Option, + /// The raw text, determined as the raw string between the backticks trimmed + /// according to the above rules. + pub text: EcoString, + /// Whether the element is block-level, that is, it has 3+ backticks + /// and contains at least one newline. + pub block: bool, +} + +impl TypedNode for RawNode { + fn cast_from(node: RedRef) -> Option { + match node.kind() { + NodeKind::Raw(raw) => { + let span = node.span(); + let start = span.start + raw.backticks as usize; + Some(Self { + block: raw.block, + lang: raw.lang.as_ref().and_then(|x| { + let span = Span::new(span.source, start, start + x.len()); + Ident::new(x, span) + }), + text: raw.text.clone(), + }) + } + _ => None, + } + } +} + +node! { + /// A section heading: `= Introduction`. + Heading => HeadingNode +} + +impl HeadingNode { + /// The contents of the heading. + pub fn body(&self) -> Markup { + self.0 + .cast_first_child() + .expect("heading node is missing markup body") + } + + /// The section depth (numer of equals signs). + pub fn level(&self) -> u8 { + self.0 + .children() + .find_map(|node| match node.kind() { + NodeKind::HeadingLevel(heading) => Some(*heading), + _ => None, + }) + .expect("heading node is missing heading level") + } +} + +node! { + /// An item in an unordered list: `- ...`. + List => ListNode +} + +impl ListNode { + /// The contents of the list item. + pub fn body(&self) -> Markup { + self.0.cast_first_child().expect("list node is missing body") + } +} + +node! { + /// An item in an enumeration (ordered list): `1. ...`. + Enum => EnumNode +} + +impl EnumNode { + /// The contents of the list item. + pub fn body(&self) -> Markup { + self.0.cast_first_child().expect("enumeration node is missing body") + } + + /// The number, if any. + pub fn number(&self) -> Option { + self.0 + .children() + .find_map(|node| match node.kind() { + NodeKind::EnumNumbering(num) => Some(num.clone()), + _ => None, + }) + .expect("enumeration node is missing number") + } +} + /// An expression. #[derive(Debug, Clone, PartialEq)] pub enum Expr { diff --git a/src/syntax/markup.rs b/src/syntax/markup.rs deleted file mode 100644 index f43a618a0..000000000 --- a/src/syntax/markup.rs +++ /dev/null @@ -1,159 +0,0 @@ -use super::{Expr, Ident, NodeKind, RedNode, RedRef, Span, TypedNode}; -use crate::node; -use crate::util::EcoString; - -node! { - /// The syntactical root capable of representing a full parsed document. - Markup -} - -impl Markup { - pub fn nodes<'a>(&'a self) -> impl Iterator + 'a { - self.0.children().filter_map(RedRef::cast) - } -} - -/// A single piece of markup. -#[derive(Debug, Clone, PartialEq)] -pub enum MarkupNode { - /// Whitespace containing less than two newlines. - Space, - /// A forced line break: `\`. - Linebreak, - /// A paragraph break: Two or more newlines. - Parbreak, - /// Strong text was enabled / disabled: `*`. - Strong, - /// Emphasized text was enabled / disabled: `_`. - Emph, - /// Plain text. - Text(EcoString), - /// A raw block with optional syntax highlighting: `` `...` ``. - Raw(RawNode), - /// A section heading: `= Introduction`. - Heading(HeadingNode), - /// An item in an unordered list: `- ...`. - List(ListNode), - /// An item in an enumeration (ordered list): `1. ...`. - Enum(EnumNode), - /// An expression. - Expr(Expr), -} - -impl TypedNode for MarkupNode { - fn cast_from(node: RedRef) -> Option { - match node.kind() { - NodeKind::Space(_) => Some(MarkupNode::Space), - NodeKind::Linebreak => Some(MarkupNode::Linebreak), - NodeKind::Parbreak => Some(MarkupNode::Parbreak), - NodeKind::Strong => Some(MarkupNode::Strong), - NodeKind::Emph => Some(MarkupNode::Emph), - NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), - NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(u.character.into())), - NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))), - NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))), - NodeKind::NonBreakingSpace => { - Some(MarkupNode::Text(EcoString::from("\u{00A0}"))) - } - NodeKind::Raw(_) => node.cast().map(MarkupNode::Raw), - NodeKind::Heading => node.cast().map(MarkupNode::Heading), - NodeKind::List => node.cast().map(MarkupNode::List), - NodeKind::Enum => node.cast().map(MarkupNode::Enum), - NodeKind::Error(_, _) => None, - _ => node.cast().map(MarkupNode::Expr), - } - } -} - -/// A raw block with optional syntax highlighting: `` `...` ``. -#[derive(Debug, Clone, PartialEq)] -pub struct RawNode { - /// An optional identifier specifying the language to syntax-highlight in. - pub lang: Option, - /// The raw text, determined as the raw string between the backticks trimmed - /// according to the above rules. - pub text: EcoString, - /// Whether the element is block-level, that is, it has 3+ backticks - /// and contains at least one newline. - pub block: bool, -} - -impl TypedNode for RawNode { - fn cast_from(node: RedRef) -> Option { - match node.kind() { - NodeKind::Raw(raw) => { - let span = node.span(); - let start = span.start + raw.backticks as usize; - Some(Self { - block: raw.block, - lang: raw.lang.as_ref().and_then(|x| { - let span = Span::new(span.source, start, start + x.len()); - Ident::new(x, span) - }), - text: raw.text.clone(), - }) - } - _ => None, - } - } -} - -node! { - /// A section heading: `= Introduction`. - Heading => HeadingNode -} - -impl HeadingNode { - /// The contents of the heading. - pub fn body(&self) -> Markup { - self.0 - .cast_first_child() - .expect("heading node is missing markup body") - } - - /// The section depth (numer of equals signs). - pub fn level(&self) -> u8 { - self.0 - .children() - .find_map(|node| match node.kind() { - NodeKind::HeadingLevel(heading) => Some(*heading), - _ => None, - }) - .expect("heading node is missing heading level") - } -} - -node! { - /// An item in an unordered list: `- ...`. - List => ListNode -} - -impl ListNode { - /// The contents of the list item. - pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("list node is missing body") - } -} - -node! { - /// An item in an enumeration (ordered list): `1. ...`. - Enum => EnumNode -} - -impl EnumNode { - /// The contents of the list item. - pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("enumeration node is missing body") - } - - /// The number, if any. - pub fn number(&self) -> Option { - self.0 - .children() - .find_map(|node| match node.kind() { - NodeKind::EnumNumbering(num) => Some(num.clone()), - _ => None, - }) - .expect("enumeration node is missing number") - } -} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 9d4beb6cb..9fd2b21d2 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,450 +1,38 @@ //! Syntax types. -mod expr; +mod ast; mod ident; -mod markup; mod pretty; mod span; -mod token; use std::fmt; use std::fmt::{Debug, Display, Formatter}; use std::mem; use std::rc::Rc; -pub use expr::*; +pub use ast::*; pub use ident::*; -pub use markup::*; pub use pretty::*; pub use span::*; -pub use token::*; use crate::geom::{AngularUnit, LengthUnit}; use crate::source::SourceId; use crate::util::EcoString; -#[derive(Debug, Clone, PartialEq)] -pub enum NodeKind { - /// A left square bracket: `[`. - LeftBracket, - /// A right square bracket: `]`. - RightBracket, - /// A left curly brace: `{`. - LeftBrace, - /// A right curly brace: `}`. - RightBrace, - /// A left round parenthesis: `(`. - LeftParen, - /// A right round parenthesis: `)`. - RightParen, - /// An asterisk: `*`. - Star, - /// A comma: `,`. - Comma, - /// A semicolon: `;`. - Semicolon, - /// A colon: `:`. - Colon, - /// A plus: `+`. - Plus, - /// A hyphen: `-`. - Minus, - /// A slash: `/`. - Slash, - /// A single equals sign: `=`. - Eq, - /// Two equals signs: `==`. - EqEq, - /// An exclamation mark followed by an equals sign: `!=`. - ExclEq, - /// A less-than sign: `<`. - Lt, - /// A less-than sign followed by an equals sign: `<=`. - LtEq, - /// A greater-than sign: `>`. - Gt, - /// A greater-than sign followed by an equals sign: `>=`. - GtEq, - /// A plus followed by an equals sign: `+=`. - PlusEq, - /// A hyphen followed by an equals sign: `-=`. - HyphEq, - /// An asterisk followed by an equals sign: `*=`. - StarEq, - /// A slash followed by an equals sign: `/=`. - SlashEq, - /// Two dots: `..`. - Dots, - /// An equals sign followed by a greater-than sign: `=>`. - Arrow, - /// The `not` operator. - Not, - /// The `and` operator. - And, - /// The `or` operator. - Or, - /// The `with` operator. - With, - /// The `with` expression: `with (1)`. - WithExpr, - /// The none literal: `none`. - None, - /// The auto literal: `auto`. - Auto, - /// The `let` keyword. - Let, - /// The `if` keyword. - If, - /// The `else` keyword. - Else, - /// The `for` keyword. - For, - /// The `in` keyword. - In, - /// The `while` keyword. - While, - /// The `break` keyword. - Break, - /// The `continue` keyword. - Continue, - /// The `return` keyword. - Return, - /// The `import` keyword. - Import, - /// The `include` keyword. - Include, - /// The `from` keyword. - From, - /// One or more whitespace characters. - Space(usize), - /// A consecutive non-markup string. - Text(EcoString), - /// A slash and the letter "u" followed by a hexadecimal unicode entity - /// enclosed in curly braces: `\u{1F5FA}`. - UnicodeEscape(UnicodeEscapeToken), - /// An arbitrary number of backticks followed by inner contents, terminated - /// with the same number of backticks: `` `...` ``. - Raw(Rc), - /// Dollar signs surrounding inner contents. - Math(Rc), - /// A numbering: `23.`. - /// - /// Can also exist without the number: `.`. - EnumNumbering(Option), - /// An identifier: `center`. - Ident(EcoString), - /// A boolean: `true`, `false`. - Bool(bool), - /// An integer: `120`. - Int(i64), - /// A floating-point number: `1.2`, `10e-4`. - Float(f64), - /// A length: `12pt`, `3cm`. - Length(f64, LengthUnit), - /// An angle: `90deg`. - Angle(f64, AngularUnit), - /// A percentage: `50%`. - /// - /// _Note_: `50%` is stored as `50.0` here, as in the corresponding - /// [literal](super::Lit::Percent). - Percentage(f64), - /// A fraction unit: `3fr`. - Fraction(f64), - /// A quoted string: `"..."`. - Str(StrToken), - /// Two slashes followed by inner contents, terminated with a newline: - /// `//\n`. - LineComment, - /// A slash and a star followed by inner contents, terminated with a star - /// and a slash: `/**/`. - /// - /// The comment can contain nested block comments. - BlockComment, - /// Tokens that appear in the wrong place. - Error(ErrorPosition, EcoString), - /// Unknown character sequences. - Unknown(EcoString), - /// Template markup. - Markup, - /// A forced line break: `\`. - Linebreak, - /// A paragraph break: Two or more newlines. - Parbreak, - /// Strong text was enabled / disabled: `*`. - Strong, - /// Emphasized text was enabled / disabled: `_`. - Emph, - /// A non-breaking space: `~`. - NonBreakingSpace, - /// An en-dash: `--`. - EnDash, - /// An em-dash: `---`. - EmDash, - /// A section heading: `= Introduction`. - Heading, - /// A heading's level: `=`, `==`, `===`, etc. - HeadingLevel(u8), - /// An item in an unordered list: `- ...`. - List, - /// The bullet character of an item in an unordered list: `-`. - ListBullet, - /// An item in an enumeration (ordered list): `1. ...`. - Enum, - /// An array expression: `(1, "hi", 12cm)`. - Array, - /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. - Dict, - /// A named argument: `thickness: 3pt`. - Named, - /// A template expression: `[*Hi* there!]`. - Template, - /// A grouped expression: `(1 + 2)`. - Group, - /// A block expression: `{ let x = 1; x + 2 }`. - Block, - /// A unary operation: `-x`. - Unary, - /// A binary operation: `a + b`. - Binary, - /// An invocation of a function: `f(x, y)`. - Call, - /// A function call's argument list: `(x, y)`. - CallArgs, - /// A closure expression: `(x, y) => z`. - Closure, - /// A closure's parameters: `(x, y)`. - ClosureParams, - /// A parameter sink: `..x`. - ParameterSink, - /// A for loop expression: `for x in y { ... }`. - ForExpr, - /// A while loop expression: `while x { ... }`. - WhileExpr, - /// An if expression: `if x { ... }`. - IfExpr, - /// A let expression: `let x = 1`. - LetExpr, - /// A for loop's destructuring pattern: `x` or `x, y`. - ForPattern, - /// The import expression: `import x from "foo.typ"`. - ImportExpr, - /// Items to import: `a, b, c`. - ImportItems, - /// The include expression: `include "foo.typ"`. - IncludeExpr, -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum ErrorPosition { - /// At the start of the node. - Start, - /// Over the full width of the node. - Full, - /// At the end of the node. - End, -} - -impl Display for NodeKind { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.pad(self.as_str()) - } -} - -impl NodeKind { - pub fn is_parenthesis(&self) -> bool { - match self { - Self::LeftParen => true, - Self::RightParen => true, - _ => false, - } - } - - pub fn is_bracket(&self) -> bool { - match self { - Self::LeftBracket => true, - Self::RightBracket => true, - _ => false, - } - } - - pub fn is_brace(&self) -> bool { - match self { - Self::LeftBrace => true, - Self::RightBrace => true, - _ => false, - } - } - - pub fn is_error(&self) -> bool { - matches!(self, NodeKind::Error(_, _)) - } - - pub fn as_str(&self) -> &'static str { - match self { - Self::LeftBracket => "opening bracket", - Self::RightBracket => "closing bracket", - Self::LeftBrace => "opening brace", - Self::RightBrace => "closing brace", - Self::LeftParen => "opening paren", - Self::RightParen => "closing paren", - Self::Star => "star", - Self::Comma => "comma", - Self::Semicolon => "semicolon", - Self::Colon => "colon", - Self::Plus => "plus", - Self::Minus => "minus", - Self::Slash => "slash", - Self::Eq => "assignment operator", - Self::EqEq => "equality operator", - Self::ExclEq => "inequality operator", - Self::Lt => "less-than operator", - Self::LtEq => "less-than or equal operator", - Self::Gt => "greater-than operator", - Self::GtEq => "greater-than or equal operator", - Self::PlusEq => "add-assign operator", - Self::HyphEq => "subtract-assign operator", - Self::StarEq => "multiply-assign operator", - Self::SlashEq => "divide-assign operator", - Self::Dots => "dots", - Self::Arrow => "arrow", - Self::Not => "operator `not`", - Self::And => "operator `and`", - Self::Or => "operator `or`", - Self::With => "operator `with`", - Self::WithExpr => "`with` expression", - Self::None => "`none`", - Self::Auto => "`auto`", - Self::Let => "keyword `let`", - Self::If => "keyword `if`", - Self::Else => "keyword `else`", - Self::For => "keyword `for`", - Self::In => "keyword `in`", - Self::While => "keyword `while`", - Self::Break => "keyword `break`", - Self::Continue => "keyword `continue`", - Self::Return => "keyword `return`", - Self::Import => "keyword `import`", - Self::Include => "keyword `include`", - Self::From => "keyword `from`", - Self::Space(_) => "space", - Self::Math(_) => "math formula", - Self::EnumNumbering(_) => "numbering", - Self::Str(_) => "string", - Self::LineComment => "line comment", - Self::BlockComment => "block comment", - Self::Markup => "markup", - Self::Linebreak => "forced linebreak", - Self::Parbreak => "paragraph break", - Self::Strong => "strong", - Self::Emph => "emphasis", - Self::Text(_) => "text", - Self::NonBreakingSpace => "non-breaking space", - Self::EnDash => "en dash", - Self::EmDash => "em dash", - Self::UnicodeEscape(_) => "unicode escape sequence", - Self::Raw(_) => "raw block", - Self::Heading => "heading", - Self::HeadingLevel(_) => "heading level", - Self::List => "list", - Self::ListBullet => "list bullet", - Self::Enum => "enum", - Self::Ident(_) => "identifier", - Self::Bool(_) => "boolean", - Self::Int(_) => "integer", - Self::Float(_) => "float", - Self::Length(_, _) => "length", - Self::Angle(_, _) => "angle", - Self::Percentage(_) => "percentage", - Self::Fraction(_) => "`fr` value", - Self::Array => "array", - Self::Dict => "dictionary", - Self::Named => "named argument", - Self::Template => "template", - Self::Group => "group", - Self::Block => "block", - Self::Unary => "unary expression", - Self::Binary => "binary expression", - Self::Call => "call", - Self::CallArgs => "call arguments", - Self::Closure => "closure", - Self::ClosureParams => "closure parameters", - Self::ParameterSink => "parameter sink", - Self::ForExpr => "for-loop expression", - Self::WhileExpr => "while-loop expression", - Self::IfExpr => "if expression", - Self::LetExpr => "let expression", - Self::ForPattern => "for-loop destructuring pattern", - Self::ImportExpr => "import expression", - Self::ImportItems => "import items", - Self::IncludeExpr => "include expression", - Self::Unknown(src) => match src.as_str() { - "*/" => "end of block comment", - _ => "invalid token", - }, - Self::Error(_, _) => "parse error", - } - } -} - -/// A syntactical node. -#[derive(Clone, PartialEq)] -pub struct GreenNode { - /// Node metadata. - data: GreenData, - /// This node's children, losslessly make up this node. - children: Vec, -} - -/// Data shared between [`GreenNode`]s and [`GreenToken`]s. -#[derive(Clone, PartialEq)] -pub struct GreenData { - /// What kind of node this is (each kind would have its own struct in a - /// strongly typed AST). - kind: NodeKind, - /// The byte length of the node in the source. - len: usize, - /// Whether this node or any of its children are erroneous. - erroneous: bool, -} - -impl GreenData { - pub fn new(kind: NodeKind, len: usize) -> Self { - Self { len, erroneous: kind.is_error(), kind } - } - - pub fn kind(&self) -> &NodeKind { - &self.kind - } - - pub fn len(&self) -> usize { - self.len - } - - pub fn erroneous(&self) -> bool { - self.erroneous - } -} - -impl From for Green { - fn from(token: GreenData) -> Self { - Self::Token(token) - } -} - /// Children of a [`GreenNode`]. #[derive(Clone, PartialEq)] pub enum Green { - /// A terminal owned token. - Token(GreenData), /// A non-terminal node in an Rc. Node(Rc), + /// A terminal owned token. + Token(GreenData), } impl Green { fn data(&self) -> &GreenData { match self { - Green::Token(t) => &t, Green::Node(n) => &n.data, + Green::Token(t) => &t, } } @@ -462,12 +50,41 @@ impl Green { pub fn children(&self) -> &[Green] { match self { - Green::Token(_) => &[], Green::Node(n) => &n.children(), + Green::Token(_) => &[], } } } +impl Default for Green { + fn default() -> Self { + Self::Token(GreenData::new(NodeKind::None, 0)) + } +} + +impl Debug for Green { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:?}: {}", self.kind(), self.len())?; + if let Self::Node(n) = self { + if !n.children.is_empty() { + f.write_str(" ")?; + f.debug_list().entries(&n.children).finish()?; + } + } + + Ok(()) + } +} + +/// A syntactical node. +#[derive(Clone, PartialEq)] +pub struct GreenNode { + /// Node metadata. + data: GreenData, + /// This node's children, losslessly make up this node. + children: Vec, +} + impl GreenNode { pub fn new(kind: NodeKind, len: usize) -> Self { Self { @@ -503,23 +120,39 @@ impl From> for Green { } } -impl Default for Green { - fn default() -> Self { - Self::Token(GreenData::new(NodeKind::None, 0)) +/// Data shared between [`GreenNode`]s and [`GreenToken`]s. +#[derive(Clone, PartialEq)] +pub struct GreenData { + /// What kind of node this is (each kind would have its own struct in a + /// strongly typed AST). + kind: NodeKind, + /// The byte length of the node in the source. + len: usize, + /// Whether this node or any of its children are erroneous. + erroneous: bool, +} + +impl GreenData { + pub fn new(kind: NodeKind, len: usize) -> Self { + Self { len, erroneous: kind.is_error(), kind } + } + + pub fn kind(&self) -> &NodeKind { + &self.kind + } + + pub fn len(&self) -> usize { + self.len + } + + pub fn erroneous(&self) -> bool { + self.erroneous } } -impl Debug for Green { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", self.kind(), self.len())?; - if let Self::Node(n) = self { - if !n.children.is_empty() { - f.write_str(" ")?; - f.debug_list().entries(&n.children).finish()?; - } - } - - Ok(()) +impl From for Green { + fn from(token: GreenData) -> Self { + Self::Token(token) } } @@ -678,6 +311,408 @@ pub trait TypedNode: Sized { fn cast_from(value: RedRef) -> Option; } +#[derive(Debug, Clone, PartialEq)] +pub enum NodeKind { + /// A left square bracket: `[`. + LeftBracket, + /// A right square bracket: `]`. + RightBracket, + /// A left curly brace: `{`. + LeftBrace, + /// A right curly brace: `}`. + RightBrace, + /// A left round parenthesis: `(`. + LeftParen, + /// A right round parenthesis: `)`. + RightParen, + /// An asterisk: `*`. + Star, + /// A comma: `,`. + Comma, + /// A semicolon: `;`. + Semicolon, + /// A colon: `:`. + Colon, + /// A plus: `+`. + Plus, + /// A hyphen: `-`. + Minus, + /// A slash: `/`. + Slash, + /// A single equals sign: `=`. + Eq, + /// Two equals signs: `==`. + EqEq, + /// An exclamation mark followed by an equals sign: `!=`. + ExclEq, + /// A less-than sign: `<`. + Lt, + /// A less-than sign followed by an equals sign: `<=`. + LtEq, + /// A greater-than sign: `>`. + Gt, + /// A greater-than sign followed by an equals sign: `>=`. + GtEq, + /// A plus followed by an equals sign: `+=`. + PlusEq, + /// A hyphen followed by an equals sign: `-=`. + HyphEq, + /// An asterisk followed by an equals sign: `*=`. + StarEq, + /// A slash followed by an equals sign: `/=`. + SlashEq, + /// The `not` operator. + Not, + /// The `and` operator. + And, + /// The `or` operator. + Or, + /// The `with` operator. + With, + /// Two dots: `..`. + Dots, + /// An equals sign followed by a greater-than sign: `=>`. + Arrow, + /// The none literal: `none`. + None, + /// The auto literal: `auto`. + Auto, + /// The `let` keyword. + Let, + /// The `if` keyword. + If, + /// The `else` keyword. + Else, + /// The `for` keyword. + For, + /// The `in` keyword. + In, + /// The `while` keyword. + While, + /// The `break` keyword. + Break, + /// The `continue` keyword. + Continue, + /// The `return` keyword. + Return, + /// The `import` keyword. + Import, + /// The `include` keyword. + Include, + /// The `from` keyword. + From, + /// Template markup. + Markup, + /// One or more whitespace characters. + Space(usize), + /// A forced line break: `\`. + Linebreak, + /// A paragraph break: Two or more newlines. + Parbreak, + /// A consecutive non-markup string. + Text(EcoString), + /// A non-breaking space: `~`. + NonBreakingSpace, + /// An en-dash: `--`. + EnDash, + /// An em-dash: `---`. + EmDash, + /// A slash and the letter "u" followed by a hexadecimal unicode entity + /// enclosed in curly braces: `\u{1F5FA}`. + UnicodeEscape(UnicodeEscapeToken), + /// Strong text was enabled / disabled: `*`. + Strong, + /// Emphasized text was enabled / disabled: `_`. + Emph, + /// A section heading: `= Introduction`. + Heading, + /// A heading's level: `=`, `==`, `===`, etc. + HeadingLevel(u8), + /// An item in an enumeration (ordered list): `1. ...`. + Enum, + /// A numbering: `23.`. + /// + /// Can also exist without the number: `.`. + EnumNumbering(Option), + /// An item in an unordered list: `- ...`. + List, + /// The bullet character of an item in an unordered list: `-`. + ListBullet, + /// An arbitrary number of backticks followed by inner contents, terminated + /// with the same number of backticks: `` `...` ``. + Raw(Rc), + /// Dollar signs surrounding inner contents. + Math(Rc), + /// An identifier: `center`. + Ident(EcoString), + /// A boolean: `true`, `false`. + Bool(bool), + /// An integer: `120`. + Int(i64), + /// A floating-point number: `1.2`, `10e-4`. + Float(f64), + /// A length: `12pt`, `3cm`. + Length(f64, LengthUnit), + /// An angle: `90deg`. + Angle(f64, AngularUnit), + /// A percentage: `50%`. + /// + /// _Note_: `50%` is stored as `50.0` here, as in the corresponding + /// [literal](super::Lit::Percent). + Percentage(f64), + /// A fraction unit: `3fr`. + Fraction(f64), + /// A quoted string: `"..."`. + Str(StrToken), + /// An array expression: `(1, "hi", 12cm)`. + Array, + /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. + Dict, + /// A named argument: `thickness: 3pt`. + Named, + /// A grouped expression: `(1 + 2)`. + Group, + /// A unary operation: `-x`. + Unary, + /// A binary operation: `a + b`. + Binary, + /// An invocation of a function: `f(x, y)`. + Call, + /// A function call's argument list: `(x, y)`. + CallArgs, + /// A closure expression: `(x, y) => z`. + Closure, + /// A closure's parameters: `(x, y)`. + ClosureParams, + /// A parameter sink: `..x`. + ParameterSink, + /// A template expression: `[*Hi* there!]`. + Template, + /// A block expression: `{ let x = 1; x + 2 }`. + Block, + /// A for loop expression: `for x in y { ... }`. + ForExpr, + /// A while loop expression: `while x { ... }`. + WhileExpr, + /// An if expression: `if x { ... }`. + IfExpr, + /// A let expression: `let x = 1`. + LetExpr, + /// The `with` expression: `with (1)`. + WithExpr, + /// A for loop's destructuring pattern: `x` or `x, y`. + ForPattern, + /// The import expression: `import x from "foo.typ"`. + ImportExpr, + /// Items to import: `a, b, c`. + ImportItems, + /// The include expression: `include "foo.typ"`. + IncludeExpr, + /// Two slashes followed by inner contents, terminated with a newline: + /// `//\n`. + LineComment, + /// A slash and a star followed by inner contents, terminated with a star + /// and a slash: `/**/`. + /// + /// The comment can contain nested block comments. + BlockComment, + /// Tokens that appear in the wrong place. + Error(ErrorPosition, EcoString), + /// Unknown character sequences. + Unknown(EcoString), +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ErrorPosition { + /// At the start of the node. + Start, + /// Over the full width of the node. + Full, + /// At the end of the node. + End, +} + +/// A quoted string token: `"..."`. +#[derive(Debug, Clone, PartialEq)] +#[repr(transparent)] +pub struct StrToken { + /// The string inside the quotes. + pub string: EcoString, +} + +/// A raw block token: `` `...` ``. +#[derive(Debug, Clone, PartialEq)] +pub struct RawToken { + /// The raw text in the block. + pub text: EcoString, + /// The programming language of the raw text. + pub lang: Option, + /// The number of opening backticks. + pub backticks: u8, + /// Whether to display this as a block. + pub block: bool, +} + +/// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`. +#[derive(Debug, Clone, PartialEq)] +pub struct MathToken { + /// The formula between the dollars. + pub formula: EcoString, + /// Whether the formula is display-level, that is, it is surrounded by + /// `$[..]`. + pub display: bool, +} + +/// A unicode escape sequence token: `\u{1F5FA}`. +#[derive(Debug, Clone, PartialEq)] +#[repr(transparent)] +pub struct UnicodeEscapeToken { + /// The resulting unicode character. + pub character: char, +} + +impl Display for NodeKind { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.pad(self.as_str()) + } +} + +impl NodeKind { + pub fn is_paren(&self) -> bool { + match self { + Self::LeftParen => true, + Self::RightParen => true, + _ => false, + } + } + + pub fn is_bracket(&self) -> bool { + match self { + Self::LeftBracket => true, + Self::RightBracket => true, + _ => false, + } + } + + pub fn is_brace(&self) -> bool { + match self { + Self::LeftBrace => true, + Self::RightBrace => true, + _ => false, + } + } + + pub fn is_error(&self) -> bool { + matches!(self, NodeKind::Error(_, _)) + } + + pub fn as_str(&self) -> &'static str { + match self { + Self::LeftBracket => "opening bracket", + Self::RightBracket => "closing bracket", + Self::LeftBrace => "opening brace", + Self::RightBrace => "closing brace", + Self::LeftParen => "opening paren", + Self::RightParen => "closing paren", + Self::Star => "star", + Self::Comma => "comma", + Self::Semicolon => "semicolon", + Self::Colon => "colon", + Self::Plus => "plus", + Self::Minus => "minus", + Self::Slash => "slash", + Self::Eq => "assignment operator", + Self::EqEq => "equality operator", + Self::ExclEq => "inequality operator", + Self::Lt => "less-than operator", + Self::LtEq => "less-than or equal operator", + Self::Gt => "greater-than operator", + Self::GtEq => "greater-than or equal operator", + Self::PlusEq => "add-assign operator", + Self::HyphEq => "subtract-assign operator", + Self::StarEq => "multiply-assign operator", + Self::SlashEq => "divide-assign operator", + Self::Not => "operator `not`", + Self::And => "operator `and`", + Self::Or => "operator `or`", + Self::With => "operator `with`", + Self::Dots => "dots", + Self::Arrow => "arrow", + Self::None => "`none`", + Self::Auto => "`auto`", + Self::Let => "keyword `let`", + Self::If => "keyword `if`", + Self::Else => "keyword `else`", + Self::For => "keyword `for`", + Self::In => "keyword `in`", + Self::While => "keyword `while`", + Self::Break => "keyword `break`", + Self::Continue => "keyword `continue`", + Self::Return => "keyword `return`", + Self::Import => "keyword `import`", + Self::Include => "keyword `include`", + Self::From => "keyword `from`", + Self::Markup => "markup", + Self::Space(_) => "space", + Self::Linebreak => "forced linebreak", + Self::Parbreak => "paragraph break", + Self::Text(_) => "text", + Self::NonBreakingSpace => "non-breaking space", + Self::EnDash => "en dash", + Self::EmDash => "em dash", + Self::UnicodeEscape(_) => "unicode escape sequence", + Self::Strong => "strong", + Self::Emph => "emphasis", + Self::Heading => "heading", + Self::HeadingLevel(_) => "heading level", + Self::Enum => "enumeration item", + Self::EnumNumbering(_) => "enumeration item numbering", + Self::List => "list item", + Self::ListBullet => "list bullet", + Self::Raw(_) => "raw block", + Self::Math(_) => "math formula", + Self::Ident(_) => "identifier", + Self::Bool(_) => "boolean", + Self::Int(_) => "integer", + Self::Float(_) => "float", + Self::Length(_, _) => "length", + Self::Angle(_, _) => "angle", + Self::Percentage(_) => "percentage", + Self::Fraction(_) => "`fr` value", + Self::Str(_) => "string", + Self::Array => "array", + Self::Dict => "dictionary", + Self::Named => "named argument", + Self::Group => "group", + Self::Unary => "unary expression", + Self::Binary => "binary expression", + Self::Call => "call", + Self::CallArgs => "call arguments", + Self::Closure => "closure", + Self::ClosureParams => "closure parameters", + Self::ParameterSink => "parameter sink", + Self::Template => "template", + Self::Block => "block", + Self::ForExpr => "for-loop expression", + Self::WhileExpr => "while-loop expression", + Self::IfExpr => "`if` expression", + Self::LetExpr => "`let` expression", + Self::WithExpr => "`with` expression", + Self::ForPattern => "for-loop destructuring pattern", + Self::ImportExpr => "`import` expression", + Self::ImportItems => "import items", + Self::IncludeExpr => "`include` expression", + Self::LineComment => "line comment", + Self::BlockComment => "block comment", + Self::Error(_, _) => "parse error", + Self::Unknown(src) => match src.as_str() { + "*/" => "end of block comment", + _ => "invalid token", + }, + } + } +} + #[macro_export] macro_rules! node { ($(#[$attr:meta])* $name:ident) => { diff --git a/src/syntax/token.rs b/src/syntax/token.rs deleted file mode 100644 index 4f43bb4f5..000000000 --- a/src/syntax/token.rs +++ /dev/null @@ -1,40 +0,0 @@ -use crate::util::EcoString; - -/// A quoted string token: `"..."`. -#[derive(Debug, Clone, PartialEq)] -#[repr(transparent)] -pub struct StrToken { - /// The string inside the quotes. - pub string: EcoString, -} - -/// A raw block token: `` `...` ``. -#[derive(Debug, Clone, PartialEq)] -pub struct RawToken { - /// The raw text in the block. - pub text: EcoString, - /// The programming language of the raw text. - pub lang: Option, - /// The number of opening backticks. - pub backticks: u8, - /// Whether to display this as a block. - pub block: bool, -} - -/// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`. -#[derive(Debug, Clone, PartialEq)] -pub struct MathToken { - /// The formula between the dollars. - pub formula: EcoString, - /// Whether the formula is display-level, that is, it is surrounded by - /// `$[..]`. - pub display: bool, -} - -/// A unicode escape sequence token: `\u{1F5FA}`. -#[derive(Debug, Clone, PartialEq)] -#[repr(transparent)] -pub struct UnicodeEscapeToken { - /// The resulting unicode character. - pub character: char, -} From 49fb3cd4e2a5d6997ad4046d3514f154d8c866dd Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 1 Nov 2021 13:03:18 +0100 Subject: [PATCH 07/18] Code Review: Life is Like a Box of Iterators --- benches/oneshot.rs | 8 +-- src/parse/mod.rs | 3 +- src/parse/parser.rs | 24 +++---- src/parse/resolve.rs | 8 +-- src/parse/scanner.rs | 10 +++ src/parse/tokens.rs | 30 ++++----- src/source.rs | 20 ++---- src/syntax/ast.rs | 33 ++++++++- src/syntax/mod.rs | 156 +++++++++++++++++++------------------------ 9 files changed, 150 insertions(+), 142 deletions(-) diff --git a/benches/oneshot.rs b/benches/oneshot.rs index a42a710d9..63f201ac5 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -6,7 +6,7 @@ use typst::eval::eval; use typst::layout::layout; use typst::loading::MemLoader; use typst::parse::{parse, Scanner, TokenMode, Tokens}; -use typst::source::{SourceFile, SourceId}; +use typst::source::SourceId; use typst::Context; const SRC: &str = include_str!("bench.typ"); @@ -44,13 +44,11 @@ fn bench_scan(iai: &mut Iai) { } fn bench_tokenize(iai: &mut Iai) { - let src = SourceFile::detached(SRC); - iai.run(|| Tokens::new(black_box(&src), black_box(TokenMode::Markup)).count()); + iai.run(|| Tokens::new(black_box(&SRC), black_box(TokenMode::Markup)).count()); } fn bench_parse(iai: &mut Iai) { - let src = SourceFile::detached(SRC); - iai.run(|| parse(&src)); + iai.run(|| parse(&SRC)); } fn bench_eval(iai: &mut Iai) { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 22288d01a..c6def4dcc 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -12,12 +12,11 @@ pub use tokens::*; use std::rc::Rc; -use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; /// Parse a source file. -pub fn parse(source: &SourceFile) -> Rc { +pub fn parse(source: &str) -> Rc { let mut p = Parser::new(source); markup(&mut p); p.finish() diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 240de43d7..374e7c09f 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,15 +1,14 @@ use std::ops::Range; use std::rc::Rc; -use super::{TokenMode, Tokens}; -use crate::source::{SourceFile, SourceId}; +use super::{is_newline, TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; /// A convenient token-based parser. pub struct Parser<'s> { /// The parsed file. - source: &'s SourceFile, + src: &'s str, /// An iterator over the source tokens. tokens: Tokens<'s>, /// The stack of open groups. @@ -61,11 +60,11 @@ pub enum Group { impl<'s> Parser<'s> { /// Create a new parser for the source string. - pub fn new(source: &'s SourceFile) -> Self { - let mut tokens = Tokens::new(source, TokenMode::Markup); + pub fn new(src: &'s str) -> Self { + let mut tokens = Tokens::new(src, TokenMode::Markup); let next = tokens.next(); Self { - source, + src, tokens, groups: vec![], next: next.clone(), @@ -78,11 +77,6 @@ impl<'s> Parser<'s> { } } - /// The id of the parsed source file. - pub fn id(&self) -> SourceId { - self.source.id() - } - /// Start a nested node. /// /// Each start call has to be matched with a call to `end`, @@ -366,12 +360,16 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.source.byte_to_column(index).unwrap() + self.src[.. index] + .chars() + .rev() + .take_while(|&c| !is_newline(c)) + .count() } /// Slice out part of the source string. pub fn get(&self, range: Range) -> &'s str { - self.source.get(range).unwrap() + self.src.get(range).unwrap() } /// Continue parsing in a group. diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 8d4c04d49..3fab98a4b 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,5 +1,5 @@ use super::{is_newline, Scanner}; -use crate::syntax::RawToken; +use crate::syntax::RawData; use crate::util::EcoString; /// Resolve all escape sequences in a string. @@ -46,18 +46,18 @@ pub fn resolve_hex(sequence: &str) -> Option { } /// Resolve the language tag and trims the raw text. -pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken { +pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); - RawToken { + RawData { lang: Some(tag.into()), text: text.into(), backticks, block, } } else { - RawToken { + RawData { lang: None, text: split_lines(text).join("\n").into(), backticks, diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 8e3e42782..edf28e179 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -106,6 +106,16 @@ impl<'s> Scanner<'s> { self.index } + /// The column index of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + self.src[.. index] + .chars() + .rev() + .take_while(|&c| !is_newline(c)) + .count() + } + /// Jump to an index in the source string. #[inline] pub fn jump(&mut self, index: usize) { diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 1d2e32ec5..ef2678d4c 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,7 +1,6 @@ use super::{is_newline, resolve_raw, Scanner}; use crate::geom::{AngularUnit, LengthUnit}; use crate::parse::resolve::{resolve_hex, resolve_string}; -use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; @@ -9,7 +8,6 @@ use std::rc::Rc; /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { - source: &'s SourceFile, s: Scanner<'s>, mode: TokenMode, } @@ -26,12 +24,8 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] - pub fn new(source: &'s SourceFile, mode: TokenMode) -> Self { - Self { - s: Scanner::new(source.src()), - source, - mode, - } + pub fn new(source: &'s str, mode: TokenMode) -> Self { + Self { s: Scanner::new(source), mode } } /// Get the current token mode. @@ -244,7 +238,7 @@ impl<'s> Tokens<'s> { if self.s.eat_if('}') { if let Some(character) = resolve_hex(&sequence) { - NodeKind::UnicodeEscape(UnicodeEscapeToken { + NodeKind::UnicodeEscape(UnicodeEscapeData { character, }) } else { @@ -314,7 +308,7 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.source.byte_to_column(self.s.index() - 1).unwrap(); + let column = self.s.column(self.s.index() - 1); let mut backticks = 1; while self.s.eat_if('`') && backticks < u8::MAX { backticks += 1; @@ -322,7 +316,7 @@ impl<'s> Tokens<'s> { // Special case for empty inline block. if backticks == 2 { - return NodeKind::Raw(Rc::new(RawToken { + return NodeKind::Raw(Rc::new(RawData { text: EcoString::new(), lang: None, backticks: 1, @@ -397,7 +391,7 @@ impl<'s> Tokens<'s> { }; if terminated { - NodeKind::Math(Rc::new(MathToken { + NodeKind::Math(Rc::new(MathData { formula: self.s.get(start .. end).into(), display, })) @@ -492,7 +486,7 @@ impl<'s> Tokens<'s> { } })); if self.s.eat_if('"') { - NodeKind::Str(StrToken { string }) + NodeKind::Str(StrData { string }) } else { NodeKind::Error(ErrorPosition::End, "expected quote".into()) } @@ -567,7 +561,7 @@ mod tests { use TokenMode::{Code, Markup}; fn UnicodeEscape(character: char) -> NodeKind { - NodeKind::UnicodeEscape(UnicodeEscapeToken { character }) + NodeKind::UnicodeEscape(UnicodeEscapeData { character }) } fn Error(pos: ErrorPosition, message: &str) -> NodeKind { @@ -575,7 +569,7 @@ mod tests { } fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind { - NodeKind::Raw(Rc::new(RawToken { + NodeKind::Raw(Rc::new(RawData { text: text.into(), lang: lang.map(Into::into), backticks: backticks_left, @@ -586,7 +580,7 @@ mod tests { fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { match err_msg { None => { - NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display })) + NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) } Some(msg) => NodeKind::Error( ErrorPosition::End, @@ -597,7 +591,7 @@ mod tests { fn Str(string: &str, terminated: bool) -> NodeKind { if terminated { - NodeKind::Str(StrToken { string: string.into() }) + NodeKind::Str(StrData { string: string.into() }) } else { NodeKind::Error(ErrorPosition::End, "expected quote".into()) } @@ -687,7 +681,7 @@ mod tests { }}; (@$mode:ident: $src:expr => $($token:expr),*) => {{ let src = $src; - let found = Tokens::new(&SourceFile::detached(src.clone()), $mode).collect::>(); + let found = Tokens::new(&src, $mode).collect::>(); let expected = vec![$($token.clone()),*]; check(&src, found, expected); }}; diff --git a/src/source.rs b/src/source.rs index e3803f575..3b7212514 100644 --- a/src/source.rs +++ b/src/source.rs @@ -8,10 +8,10 @@ use std::rc::Rc; use serde::{Deserialize, Serialize}; -use crate::diag::{Error, TypResult}; +use crate::diag::TypResult; use crate::loading::{FileHash, Loader}; use crate::parse::{is_newline, parse, Scanner}; -use crate::syntax::{GreenNode, Markup, NodeKind, RedNode}; +use crate::syntax::{GreenNode, Markup, RedNode}; use crate::util::PathExt; #[cfg(feature = "codespan-reporting")] @@ -134,28 +134,22 @@ impl SourceFile { pub fn new(id: SourceId, path: &Path, src: String) -> Self { let mut line_starts = vec![0]; line_starts.extend(newlines(&src)); - let mut init = Self { + Self { id, path: path.normalize(), + root: parse(&src), src, line_starts, - root: Rc::new(GreenNode::new(NodeKind::Markup, 0)), - }; - - let root = parse(&init); - init.root = root; - init + } } pub fn ast(&self) -> TypResult { let red = RedNode::new_root(self.root.clone(), self.id); let errors = red.errors(); if errors.is_empty() { - Ok(red.as_ref().cast().unwrap()) + Ok(red.cast().unwrap()) } else { - Err(Box::new( - errors.into_iter().map(|(span, msg)| Error::new(span, msg)).collect(), - )) + Err(Box::new(errors)) } } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index bdd0767de..6ca271a96 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,8 +1,39 @@ use super::{Ident, NodeKind, RedNode, RedRef, Span, TypedNode}; use crate::geom::{AngularUnit, LengthUnit}; -use crate::node; use crate::util::EcoString; +macro_rules! node { + ($(#[$attr:meta])* $name:ident) => { + node!{$(#[$attr])* $name => $name} + }; + ($(#[$attr:meta])* $variant:ident => $name:ident) => { + #[derive(Debug, Clone, PartialEq)] + #[repr(transparent)] + $(#[$attr])* + pub struct $name(RedNode); + + impl TypedNode for $name { + fn cast_from(node: RedRef) -> Option { + if node.kind() != &NodeKind::$variant { + return None; + } + + Some(Self(node.own())) + } + } + + impl $name { + pub fn span(&self) -> Span { + self.0.span() + } + + pub fn underlying(&self) -> RedRef { + self.0.as_ref() + } + } + }; +} + node! { /// The syntactical root capable of representing a full parsed document. Markup diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 9fd2b21d2..ca41d33f7 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -15,6 +15,7 @@ pub use ident::*; pub use pretty::*; pub use span::*; +use crate::diag::Error; use crate::geom::{AngularUnit, LengthUnit}; use crate::source::SourceId; use crate::util::EcoString; @@ -94,9 +95,9 @@ impl GreenNode { } pub fn with_children(kind: NodeKind, len: usize, children: Vec) -> Self { - let mut meta = GreenData::new(kind, len); - meta.erroneous |= children.iter().any(|c| c.erroneous()); - Self { data: meta, children } + let mut data = GreenData::new(kind, len); + data.erroneous |= children.iter().any(|c| c.erroneous()); + Self { data, children } } pub fn with_child(kind: NodeKind, len: usize, child: impl Into) -> Self { @@ -180,6 +181,10 @@ impl<'a> RedRef<'a> { Span::new(self.id, self.offset, self.offset + self.green.len()) } + pub fn len(&self) -> usize { + self.green.len() + } + pub fn cast(self) -> Option where T: TypedNode, @@ -205,6 +210,29 @@ impl<'a> RedRef<'a> { }) } + pub fn errors(&self) -> Vec { + if !self.green.erroneous() { + return vec![]; + } + + match self.kind() { + NodeKind::Error(pos, msg) => { + let span = match pos { + ErrorPosition::Start => self.span().at_start(), + ErrorPosition::Full => self.span(), + ErrorPosition::End => self.span().at_end(), + }; + + vec![Error::new(span, msg.to_string())] + } + _ => self + .children() + .filter(|red| red.green.erroneous()) + .flat_map(|red| red.errors()) + .collect(), + } + } + pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { self.children() .find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind)) @@ -219,6 +247,18 @@ impl<'a> RedRef<'a> { } } +impl Debug for RedRef<'_> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:?}: {:?}", self.kind(), self.span())?; + let mut children = self.children().peekable(); + if children.peek().is_some() { + f.write_str(" ")?; + f.debug_list().entries(children.map(RedRef::own)).finish()?; + } + Ok(()) + } +} + #[derive(Clone, PartialEq)] pub struct RedNode { id: SourceId, @@ -231,12 +271,27 @@ impl RedNode { Self { id, offset: 0, green: root.into() } } + pub fn as_ref<'a>(&'a self) -> RedRef<'a> { + RedRef { + id: self.id, + offset: self.offset, + green: &self.green, + } + } + pub fn span(&self) -> Span { self.as_ref().span() } pub fn len(&self) -> usize { - self.green.len() + self.as_ref().len() + } + + pub fn cast(self) -> Option + where + T: TypedNode, + { + T::cast_from(self.as_ref()) } pub fn kind(&self) -> &NodeKind { @@ -247,36 +302,8 @@ impl RedNode { self.as_ref().children() } - pub fn errors(&self) -> Vec<(Span, EcoString)> { - if !self.green.erroneous() { - return vec![]; - } - - match self.kind() { - NodeKind::Error(pos, msg) => { - let span = match pos { - ErrorPosition::Start => self.span().at_start(), - ErrorPosition::Full => self.span(), - ErrorPosition::End => self.span().at_end(), - }; - - vec![(span, msg.clone())] - } - _ => self - .as_ref() - .children() - .filter(|red| red.green.erroneous()) - .flat_map(|red| red.own().errors()) - .collect(), - } - } - - pub fn as_ref<'a>(&'a self) -> RedRef<'a> { - RedRef { - id: self.id, - offset: self.offset, - green: &self.green, - } + pub fn errors<'a>(&'a self) -> Vec { + self.as_ref().errors() } pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { @@ -294,15 +321,7 @@ impl RedNode { impl Debug for RedNode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {:?}", self.kind(), self.span())?; - let children = self.as_ref().children().collect::>(); - if !children.is_empty() { - f.write_str(" ")?; - f.debug_list() - .entries(children.into_iter().map(RedRef::own)) - .finish()?; - } - Ok(()) + self.as_ref().fmt(f) } } @@ -419,7 +438,7 @@ pub enum NodeKind { EmDash, /// A slash and the letter "u" followed by a hexadecimal unicode entity /// enclosed in curly braces: `\u{1F5FA}`. - UnicodeEscape(UnicodeEscapeToken), + UnicodeEscape(UnicodeEscapeData), /// Strong text was enabled / disabled: `*`. Strong, /// Emphasized text was enabled / disabled: `_`. @@ -440,9 +459,9 @@ pub enum NodeKind { ListBullet, /// An arbitrary number of backticks followed by inner contents, terminated /// with the same number of backticks: `` `...` ``. - Raw(Rc), + Raw(Rc), /// Dollar signs surrounding inner contents. - Math(Rc), + Math(Rc), /// An identifier: `center`. Ident(EcoString), /// A boolean: `true`, `false`. @@ -463,7 +482,7 @@ pub enum NodeKind { /// A fraction unit: `3fr`. Fraction(f64), /// A quoted string: `"..."`. - Str(StrToken), + Str(StrData), /// An array expression: `(1, "hi", 12cm)`. Array, /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. @@ -534,15 +553,14 @@ pub enum ErrorPosition { /// A quoted string token: `"..."`. #[derive(Debug, Clone, PartialEq)] -#[repr(transparent)] -pub struct StrToken { +pub struct StrData { /// The string inside the quotes. pub string: EcoString, } /// A raw block token: `` `...` ``. #[derive(Debug, Clone, PartialEq)] -pub struct RawToken { +pub struct RawData { /// The raw text in the block. pub text: EcoString, /// The programming language of the raw text. @@ -555,7 +573,7 @@ pub struct RawToken { /// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`. #[derive(Debug, Clone, PartialEq)] -pub struct MathToken { +pub struct MathData { /// The formula between the dollars. pub formula: EcoString, /// Whether the formula is display-level, that is, it is surrounded by @@ -565,8 +583,7 @@ pub struct MathToken { /// A unicode escape sequence token: `\u{1F5FA}`. #[derive(Debug, Clone, PartialEq)] -#[repr(transparent)] -pub struct UnicodeEscapeToken { +pub struct UnicodeEscapeData { /// The resulting unicode character. pub character: char, } @@ -712,36 +729,3 @@ impl NodeKind { } } } - -#[macro_export] -macro_rules! node { - ($(#[$attr:meta])* $name:ident) => { - node!{$(#[$attr])* $name => $name} - }; - ($(#[$attr:meta])* $variant:ident => $name:ident) => { - #[derive(Debug, Clone, PartialEq)] - #[repr(transparent)] - $(#[$attr])* - pub struct $name(RedNode); - - impl TypedNode for $name { - fn cast_from(node: RedRef) -> Option { - if node.kind() != &NodeKind::$variant { - return None; - } - - Some(Self(node.own())) - } - } - - impl $name { - pub fn span(&self) -> Span { - self.0.span() - } - - pub fn underlying(&self) -> RedRef { - self.0.as_ref() - } - } - }; -} From 42afb27cef5540535420fb6d8d9d2fcda7300a47 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 1 Nov 2021 13:45:33 +0100 Subject: [PATCH 08/18] Add documentation --- src/parse/parser.rs | 29 +++++++++++++++++++++--- src/syntax/mod.rs | 55 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 71 insertions(+), 13 deletions(-) diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 374e7c09f..8c68d6308 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -112,10 +112,14 @@ impl<'s> Parser<'s> { } } + /// Return the a child from the current stack frame specified by its + /// non-trivia index from the back. pub fn child(&self, child: usize) -> Option<&Green> { self.node_index_from_back(child).map(|i| &self.children[i]) } + /// Map a non-trivia index from the back of the current stack frame to a + /// normal index. fn node_index_from_back(&self, child: usize) -> Option { let len = self.children.len(); let code = self.tokens.mode() == TokenMode::Code; @@ -172,6 +176,8 @@ impl<'s> Parser<'s> { (stack_offset, diff) } + /// Wrap a specified node in the current stack frame (indexed from the back, + /// not including trivia). pub fn wrap(&mut self, index: usize, kind: NodeKind) { let index = self.node_index_from_back(index).unwrap(); let child = std::mem::take(&mut self.children[index]); @@ -179,6 +185,7 @@ impl<'s> Parser<'s> { self.children[index] = item.into(); } + /// Eat and wrap the next token. pub fn convert(&mut self, kind: NodeKind) { let len = self.tokens.index() - self.next_start; @@ -194,9 +201,11 @@ impl<'s> Parser<'s> { self.success = true; } - pub fn convert_with(&mut self, preserve: usize, kind: NodeKind) { + /// Wrap the last `amount` children in the current stack frame with a new + /// node. + pub fn convert_with(&mut self, amount: usize, kind: NodeKind) { let preserved: Vec<_> = - self.children.drain(self.children.len() - preserve ..).collect(); + self.children.drain(self.children.len() - amount ..).collect(); let len = preserved.iter().map(|c| c.len()).sum(); self.children .push(GreenNode::with_children(kind, len, preserved).into()); @@ -219,6 +228,8 @@ impl<'s> Parser<'s> { self.success = false; } + /// This function [`Self::lift`]s if the last operation was unsuccessful and + /// returns whether it did. pub fn may_lift_abort(&mut self) -> bool { if !self.success { self.lift(); @@ -229,6 +240,8 @@ impl<'s> Parser<'s> { } } + /// This function [`Self::end`]s if the last operation was unsuccessful and + /// returns whether it did. pub fn may_end_abort(&mut self, kind: NodeKind) -> bool { if !self.success { self.end(kind); @@ -251,6 +264,7 @@ impl<'s> Parser<'s> { } } + /// End the parsing process and return the last child. pub fn finish(&mut self) -> Rc { match self.children.pop().unwrap() { Green::Node(n) => n, @@ -263,6 +277,7 @@ impl<'s> Parser<'s> { self.peek().is_none() } + /// Consume the next token and return its kind. fn eat_peeked(&mut self) -> Option { let token = self.peek()?.clone(); self.eat(); @@ -490,6 +505,8 @@ impl<'s> Parser<'s> { } } + /// Returns whether the given type can be skipped over given the current + /// newline mode. pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { match token { NodeKind::Space(n) => n < &1 || !stop_at_newline, @@ -499,11 +516,12 @@ impl<'s> Parser<'s> { } } + /// Returns whether the given type can be skipped over. fn skip_type(&self, token: &NodeKind) -> bool { Self::skip_type_ext(token, self.stop_at_newline()) } - /// Move to the next token. + /// Consume the next token. pub fn eat(&mut self) { self.children.push( GreenData::new( @@ -516,6 +534,7 @@ impl<'s> Parser<'s> { self.fast_forward(); } + /// Move to the next token. pub fn fast_forward(&mut self) { if !self.next.as_ref().map_or(false, |x| self.skip_type(x)) { self.prev_end = self.tokens.index().into(); @@ -567,20 +586,24 @@ impl<'s> Parser<'s> { self.groups.iter().any(|g| g.kind == kind) } + /// Returns the last child of the current stack frame. pub fn last_child(&self) -> Option<&Green> { self.children.last() } + /// Whether the last operation was successful. pub fn success(&mut self) -> bool { let s = self.success; self.success = true; s } + /// Declare the last operation as unsuccessful. pub fn unsuccessful(&mut self) { self.success = false; } + /// Amount of children in the current stack frame. pub fn child_count(&self) -> usize { self.children.len() } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index ca41d33f7..61e0bb7e3 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -30,6 +30,7 @@ pub enum Green { } impl Green { + /// Returns the metadata of the node. fn data(&self) -> &GreenData { match self { Green::Node(n) => &n.data, @@ -37,18 +38,22 @@ impl Green { } } + /// The type of the node. pub fn kind(&self) -> &NodeKind { self.data().kind() } + /// The length of the node. pub fn len(&self) -> usize { self.data().len() } + /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { self.data().erroneous() } + /// The node's children. pub fn children(&self) -> &[Green] { match self { Green::Node(n) => &n.children(), @@ -87,23 +92,19 @@ pub struct GreenNode { } impl GreenNode { - pub fn new(kind: NodeKind, len: usize) -> Self { - Self { - data: GreenData::new(kind, len), - children: Vec::new(), - } - } - + /// Creates a new node with the given kind and children. pub fn with_children(kind: NodeKind, len: usize, children: Vec) -> Self { let mut data = GreenData::new(kind, len); data.erroneous |= children.iter().any(|c| c.erroneous()); Self { data, children } } + /// Creates a new node with the given kind and a single child. pub fn with_child(kind: NodeKind, len: usize, child: impl Into) -> Self { Self::with_children(kind, len, vec![child.into()]) } + /// The node's children. pub fn children(&self) -> &[Green] { &self.children } @@ -121,7 +122,7 @@ impl From> for Green { } } -/// Data shared between [`GreenNode`]s and [`GreenToken`]s. +/// Data shared between [`GreenNode`]s and leaf nodes. #[derive(Clone, PartialEq)] pub struct GreenData { /// What kind of node this is (each kind would have its own struct in a @@ -134,18 +135,22 @@ pub struct GreenData { } impl GreenData { + /// Create new node metadata. pub fn new(kind: NodeKind, len: usize) -> Self { Self { len, erroneous: kind.is_error(), kind } } + /// The type of the node. pub fn kind(&self) -> &NodeKind { &self.kind } + /// The length of the node. pub fn len(&self) -> usize { self.len } + /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { self.erroneous } @@ -157,6 +162,8 @@ impl From for Green { } } +/// A borrowed wrapper for the [`GreenNode`] type that allows to access spans, +/// error lists and cast to an AST. #[derive(Copy, Clone, PartialEq)] pub struct RedRef<'a> { id: SourceId, @@ -165,6 +172,7 @@ pub struct RedRef<'a> { } impl<'a> RedRef<'a> { + /// Convert to an owned representation. pub fn own(self) -> RedNode { RedNode { id: self.id, @@ -173,18 +181,22 @@ impl<'a> RedRef<'a> { } } + /// The type of the node. pub fn kind(&self) -> &NodeKind { self.green.kind() } + /// The span of the node. pub fn span(&self) -> Span { Span::new(self.id, self.offset, self.offset + self.green.len()) } + /// The length of the node. pub fn len(&self) -> usize { self.green.len() } + /// Convert the node to a typed AST node. pub fn cast(self) -> Option where T: TypedNode, @@ -192,10 +204,12 @@ impl<'a> RedRef<'a> { T::cast_from(self) } + /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { self.green.erroneous() } + /// The node's children. pub fn children(self) -> impl Iterator> + Clone { let children = match &self.green { Green::Node(node) => node.children(), @@ -210,6 +224,7 @@ impl<'a> RedRef<'a> { }) } + /// The error messages for this node and its descendants. pub fn errors(&self) -> Vec { if !self.green.erroneous() { return vec![]; @@ -233,15 +248,18 @@ impl<'a> RedRef<'a> { } } + /// Get the first child of some type. pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { self.children() .find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind)) } + /// Get the first child that can cast to some AST type. pub(crate) fn cast_first_child(&self) -> Option { self.children().find_map(RedRef::cast) } + /// Get the last child that can cast to some AST type. pub(crate) fn cast_last_child(&self) -> Option { self.children().filter_map(RedRef::cast).last() } @@ -259,6 +277,8 @@ impl Debug for RedRef<'_> { } } +/// An owned wrapper for the [`GreenNode`] type that allows to access spans, +/// error lists and cast to an AST. #[derive(Clone, PartialEq)] pub struct RedNode { id: SourceId, @@ -267,10 +287,12 @@ pub struct RedNode { } impl RedNode { + /// Create a new root node from a [`GreenNode`]. pub fn new_root(root: Rc, id: SourceId) -> Self { Self { id, offset: 0, green: root.into() } } + /// Convert to a borrowed representation. pub fn as_ref<'a>(&'a self) -> RedRef<'a> { RedRef { id: self.id, @@ -279,14 +301,17 @@ impl RedNode { } } + /// The span of the node. pub fn span(&self) -> Span { self.as_ref().span() } + /// The length of the node. pub fn len(&self) -> usize { self.as_ref().len() } + /// Convert the node to a typed AST node. pub fn cast(self) -> Option where T: TypedNode, @@ -294,26 +319,32 @@ impl RedNode { T::cast_from(self.as_ref()) } + /// The type of the node. pub fn kind(&self) -> &NodeKind { self.green.kind() } + /// The children of the node. pub fn children<'a>(&'a self) -> impl Iterator> + Clone { self.as_ref().children() } + /// The error messages for this node and its descendants. pub fn errors<'a>(&'a self) -> Vec { self.as_ref().errors() } + /// Get the first child of some type. pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { self.as_ref().typed_child(kind).map(RedRef::own) } + /// Get the first child that can cast to some AST type. pub(crate) fn cast_first_child(&self) -> Option { self.as_ref().cast_first_child() } + /// Get the last child that can cast to some AST type. pub(crate) fn cast_last_child(&self) -> Option { self.as_ref().cast_last_child() } @@ -477,7 +508,7 @@ pub enum NodeKind { /// A percentage: `50%`. /// /// _Note_: `50%` is stored as `50.0` here, as in the corresponding - /// [literal](super::Lit::Percent). + /// [literal](Lit::Percent). Percentage(f64), /// A fraction unit: `3fr`. Fraction(f64), @@ -595,6 +626,7 @@ impl Display for NodeKind { } impl NodeKind { + /// Whether this is some kind of parenthesis. pub fn is_paren(&self) -> bool { match self { Self::LeftParen => true, @@ -603,6 +635,7 @@ impl NodeKind { } } + /// Whether this is some kind of bracket. pub fn is_bracket(&self) -> bool { match self { Self::LeftBracket => true, @@ -611,6 +644,7 @@ impl NodeKind { } } + /// Whether this is some kind of brace. pub fn is_brace(&self) -> bool { match self { Self::LeftBrace => true, @@ -619,8 +653,9 @@ impl NodeKind { } } + /// Whether this is some kind of error. pub fn is_error(&self) -> bool { - matches!(self, NodeKind::Error(_, _)) + matches!(self, NodeKind::Error(_, _) | NodeKind::Unknown(_)) } pub fn as_str(&self) -> &'static str { From 65fac0e57c9852eb2131aa06c0bac43b70bfbfbc Mon Sep 17 00:00:00 2001 From: Laurenz Date: Tue, 2 Nov 2021 12:13:45 +0100 Subject: [PATCH 09/18] Refactoring Co-Authored-By: Martin --- src/diag.rs | 2 +- src/eval/capture.rs | 3 +- src/eval/mod.rs | 5 +- src/eval/walk.rs | 2 +- src/geom/relative.rs | 2 +- src/lib.rs | 2 +- src/parse/mod.rs | 3 +- src/parse/parser.rs | 8 +- src/parse/scanner.rs | 49 ++++++++--- src/parse/tokens.rs | 108 ++++++++++++------------ src/source.rs | 3 +- src/syntax/ast.rs | 141 +++++++++++++++++++++++-------- src/syntax/ident.rs | 94 --------------------- src/syntax/mod.rs | 197 ++++++++++++++++++++----------------------- src/syntax/pretty.rs | 2 +- src/syntax/span.rs | 130 ++++++---------------------- tests/typeset.rs | 14 +-- 17 files changed, 338 insertions(+), 427 deletions(-) delete mode 100644 src/syntax/ident.rs diff --git a/src/diag.rs b/src/diag.rs index f04553107..d284687ed 100644 --- a/src/diag.rs +++ b/src/diag.rs @@ -100,7 +100,7 @@ impl Trace for TypResult { { self.map_err(|mut errors| { for error in errors.iter_mut() { - if !span.contains(error.span) { + if !span.surrounds(error.span) { error.trace.push(Spanned::new(make_point(), span)); } } diff --git a/src/eval/capture.rs b/src/eval/capture.rs index b71e1ac18..e46103c8b 100644 --- a/src/eval/capture.rs +++ b/src/eval/capture.rs @@ -1,7 +1,8 @@ use std::rc::Rc; use super::{Scope, Scopes, Value}; -use crate::syntax::{ClosureParam, Expr, Imports, RedRef}; +use crate::syntax::ast::{ClosureParam, Expr, Imports}; +use crate::syntax::RedRef; /// A visitor that captures variable slots. pub struct CapturesVisitor<'a> { diff --git a/src/eval/mod.rs b/src/eval/mod.rs index ba266ea58..809209f46 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -36,7 +36,8 @@ use crate::geom::{Angle, Fractional, Length, Relative}; use crate::image::ImageStore; use crate::loading::Loader; use crate::source::{SourceId, SourceStore}; -use crate::syntax::*; +use crate::syntax::ast::*; +use crate::syntax::{Span, Spanned}; use crate::util::RefMutExt; use crate::Context; @@ -238,7 +239,7 @@ impl Eval for DictExpr { fn eval(&self, ctx: &mut EvalContext) -> TypResult { self.items() - .map(|x| Ok(((&x.name().string).into(), x.expr().eval(ctx)?))) + .map(|x| Ok((x.name().string.into(), x.expr().eval(ctx)?))) .collect() } } diff --git a/src/eval/walk.rs b/src/eval/walk.rs index e4f8ac7b8..ff73f9f90 100644 --- a/src/eval/walk.rs +++ b/src/eval/walk.rs @@ -5,7 +5,7 @@ use crate::diag::TypResult; use crate::geom::Spec; use crate::layout::BlockLevel; use crate::library::{GridNode, ParChild, ParNode, TrackSizing}; -use crate::syntax::*; +use crate::syntax::ast::*; use crate::util::BoolExt; /// Walk markup, filling the currently built template. diff --git a/src/geom/relative.rs b/src/geom/relative.rs index c2d0a0cb0..754aa6c85 100644 --- a/src/geom/relative.rs +++ b/src/geom/relative.rs @@ -3,7 +3,7 @@ use super::*; /// A relative length. /// /// _Note_: `50%` is represented as `0.5` here, but stored as `50.0` in the -/// corresponding [literal](crate::syntax::Lit::Percent). +/// corresponding [literal](crate::syntax::ast::Lit::Percent). #[derive(Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct Relative(N64); diff --git a/src/lib.rs b/src/lib.rs index 468c06d8c..033230f0b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ //! //! [tokens]: parse::Tokens //! [parsed]: parse::parse -//! [markup]: syntax::Markup +//! [markup]: syntax::ast::Markup //! [evaluate]: eval::eval //! [module]: eval::Module //! [layout tree]: layout::LayoutTree diff --git a/src/parse/mod.rs b/src/parse/mod.rs index c6def4dcc..bfe938960 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -12,7 +12,8 @@ pub use tokens::*; use std::rc::Rc; -use crate::syntax::*; +use crate::syntax::ast::{Associativity, BinOp, UnOp}; +use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; use crate::util::EcoString; /// Parse a source file. diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 8c68d6308..5833c724a 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::ops::Range; use std::rc::Rc; -use super::{is_newline, TokenMode, Tokens}; +use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -375,11 +375,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() - .rev() - .take_while(|&c| !is_newline(c)) - .count() + self.tokens.column(index) } /// Slice out part of the source string. diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index edf28e179..92a2333d4 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -1,5 +1,7 @@ use std::slice::SliceIndex; +use unicode_xid::UnicodeXID; + /// A featureful char-based scanner. #[derive(Copy, Clone)] pub struct Scanner<'s> { @@ -106,16 +108,6 @@ impl<'s> Scanner<'s> { self.index } - /// The column index of a given index in the source string. - #[inline] - pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() - .rev() - .take_while(|&c| !is_newline(c)) - .count() - } - /// Jump to an index in the source string. #[inline] pub fn jump(&mut self, index: usize) { @@ -124,6 +116,12 @@ impl<'s> Scanner<'s> { self.index = index; } + /// The full source string. + #[inline] + pub fn src(&self) -> &'s str { + &self.src + } + /// Slice out part of the source string. #[inline] pub fn get(&self, index: I) -> &'s str @@ -160,6 +158,16 @@ impl<'s> Scanner<'s> { // optimized away in some cases. self.src.get(start .. self.index).unwrap_or_default() } + + /// The column index of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + self.src[.. index] + .chars() + .rev() + .take_while(|&c| !is_newline(c)) + .count() + } } /// Whether this character denotes a newline. @@ -173,3 +181,24 @@ pub fn is_newline(character: char) -> bool { '\u{0085}' | '\u{2028}' | '\u{2029}' ) } + +/// Whether a string is a valid identifier. +#[inline] +pub fn is_ident(string: &str) -> bool { + let mut chars = string.chars(); + chars + .next() + .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue)) +} + +/// Whether a character can start an identifier. +#[inline] +pub fn is_id_start(c: char) -> bool { + c.is_xid_start() || c == '_' +} + +/// Whether a character can continue an identifier. +#[inline] +pub fn is_id_continue(c: char) -> bool { + c.is_xid_continue() || c == '_' || c == '-' +} diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index ef2678d4c..aa28e1f50 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,11 +1,13 @@ -use super::{is_newline, resolve_raw, Scanner}; +use std::rc::Rc; + +use super::{ + is_id_continue, is_id_start, is_newline, resolve_hex, resolve_raw, resolve_string, + Scanner, +}; use crate::geom::{AngularUnit, LengthUnit}; -use crate::parse::resolve::{resolve_hex, resolve_string}; use crate::syntax::*; use crate::util::EcoString; -use std::rc::Rc; - /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { s: Scanner<'s>, @@ -55,6 +57,12 @@ impl<'s> Tokens<'s> { self.s.jump(index); } + /// The column of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + self.s.column(index) + } + /// The underlying scanner. #[inline] pub fn scanner(&self) -> Scanner<'s> { @@ -237,10 +245,8 @@ impl<'s> Tokens<'s> { let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); if self.s.eat_if('}') { - if let Some(character) = resolve_hex(&sequence) { - NodeKind::UnicodeEscape(UnicodeEscapeData { - character, - }) + if let Some(c) = resolve_hex(&sequence) { + NodeKind::UnicodeEscape(c) } else { NodeKind::Error( ErrorPosition::Full, @@ -308,7 +314,8 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.s.column(self.s.index() - 1); + let column = self.column(self.s.index() - 1); + let mut backticks = 1; while self.s.eat_if('`') && backticks < u8::MAX { backticks += 1; @@ -486,7 +493,7 @@ impl<'s> Tokens<'s> { } })); if self.s.eat_if('"') { - NodeKind::Str(StrData { string }) + NodeKind::Str(string) } else { NodeKind::Error(ErrorPosition::End, "expected quote".into()) } @@ -556,12 +563,13 @@ mod tests { use super::*; + use ErrorPosition::*; use NodeKind::*; use Option::None; use TokenMode::{Code, Markup}; - fn UnicodeEscape(character: char) -> NodeKind { - NodeKind::UnicodeEscape(UnicodeEscapeData { character }) + fn UnicodeEscape(c: char) -> NodeKind { + NodeKind::UnicodeEscape(c) } fn Error(pos: ErrorPosition, message: &str) -> NodeKind { @@ -577,24 +585,12 @@ mod tests { })) } - fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { - match err_msg { - None => { - NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) - } - Some(msg) => NodeKind::Error( - ErrorPosition::End, - format!("expected closing {}", msg).into(), - ), - } + fn Math(formula: &str, display: bool) -> NodeKind { + NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) } - fn Str(string: &str, terminated: bool) -> NodeKind { - if terminated { - NodeKind::Str(StrData { string: string.into() }) - } else { - NodeKind::Error(ErrorPosition::End, "expected quote".into()) - } + fn Str(string: &str) -> NodeKind { + NodeKind::Str(string.into()) } fn Text(string: &str) -> NodeKind { @@ -659,7 +655,7 @@ mod tests { ('/', None, "//", LineComment), ('/', None, "/**/", BlockComment), ('/', Some(Markup), "*", Strong), - ('/', Some(Markup), "$ $", Math(" ", false, None)), + ('/', Some(Markup), "$ $", Math(" ", false)), ('/', Some(Markup), r"\\", Text("\\")), ('/', Some(Markup), "#let", Let), ('/', Some(Code), "(", LeftParen), @@ -781,16 +777,16 @@ mod tests { t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\"")); // Test basic unicode escapes. - t!(Markup: r"\u{}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); + t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence")); t!(Markup: r"\u{2603}" => UnicodeEscape('☃')); - t!(Markup: r"\u{P}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); + t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence")); // Test unclosed unicode escapes. - t!(Markup[" /"]: r"\u{" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{1" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{26A4" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{1Q3P" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup: r"\u{1🏕}" => Error(ErrorPosition::End, "expected closing brace"), Text("🏕"), RightBrace); + t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace")); + t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace); } #[test] @@ -882,11 +878,11 @@ mod tests { // Test basic raw block. t!(Markup: "``" => Raw("", None, 1, false)); t!(Markup: "`raw`" => Raw("raw", None, 1, false)); - t!(Markup[""]: "`]" => Error(ErrorPosition::End, "expected 1 backtick")); + t!(Markup[""]: "`]" => Error(End, "expected 1 backtick")); // Test special symbols in raw block. t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(ErrorPosition::End, "expected 1 backtick")); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(End, "expected 1 backtick")); // Test separated closing backticks. t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false)); @@ -894,28 +890,28 @@ mod tests { // Test more backticks. t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false)); - t!(Markup[""]: "`````👩‍🚀````noend" => Error(ErrorPosition::End, "expected 5 backticks")); + t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks")); t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); } #[test] fn test_tokenize_math_formulas() { // Test basic formula. - t!(Markup: "$$" => Math("", false, None)); - t!(Markup: "$x$" => Math("x", false, None)); - t!(Markup: r"$\\$" => Math(r"\\", false, None)); - t!(Markup: "$[x + y]$" => Math("x + y", true, None)); - t!(Markup: r"$[\\]$" => Math(r"\\", true, None)); + t!(Markup: "$$" => Math("", false)); + t!(Markup: "$x$" => Math("x", false)); + t!(Markup: r"$\\$" => Math(r"\\", false)); + t!(Markup: "$[x + y]$" => Math("x + y", true)); + t!(Markup: r"$[\\]$" => Math(r"\\", true)); // Test unterminated. - t!(Markup[""]: "$x" => Math("x", false, Some("dollar sign"))); - t!(Markup[""]: "$[x" => Math("x", true, Some("bracket and dollar sign"))); - t!(Markup[""]: "$[x]\n$" => Math("x]\n$", true, Some("bracket and dollar sign"))); + t!(Markup[""]: "$x" => Error(End, "expected closing dollar sign")); + t!(Markup[""]: "$[x" => Error(End, "expected closing bracket and dollar sign")); + t!(Markup[""]: "$[x]\n$" => Error(End, "expected closing bracket and dollar sign")); // Test escape sequences. - t!(Markup: r"$\$x$" => Math(r"\$x", false, None)); - t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true, None)); - t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, Some("bracket and dollar sign"))); + t!(Markup: r"$\$x$" => Math(r"\$x", false)); + t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true)); + t!(Markup[""]: r"$[ ]\\$" => Error(End, "expected closing bracket and dollar sign")); } #[test] @@ -1003,16 +999,16 @@ mod tests { #[test] fn test_tokenize_strings() { // Test basic strings. - t!(Code: "\"hi\"" => Str("hi", true)); - t!(Code: "\"hi\nthere\"" => Str("hi\nthere", true)); - t!(Code: "\"🌎\"" => Str("🌎", true)); + t!(Code: "\"hi\"" => Str("hi")); + t!(Code: "\"hi\nthere\"" => Str("hi\nthere")); + t!(Code: "\"🌎\"" => Str("🌎")); // Test unterminated. - t!(Code[""]: "\"hi" => Str("hi", false)); + t!(Code[""]: "\"hi" => Error(End, "expected quote")); // Test escaped quote. - t!(Code: r#""a\"bc""# => Str("a\"bc", true)); - t!(Code[""]: r#""\""# => Str("\"", false)); + t!(Code: r#""a\"bc""# => Str("a\"bc")); + t!(Code[""]: r#""\""# => Error(End, "expected quote")); } #[test] diff --git a/src/source.rs b/src/source.rs index 3b7212514..46d6b84bb 100644 --- a/src/source.rs +++ b/src/source.rs @@ -11,7 +11,8 @@ use serde::{Deserialize, Serialize}; use crate::diag::TypResult; use crate::loading::{FileHash, Loader}; use crate::parse::{is_newline, parse, Scanner}; -use crate::syntax::{GreenNode, Markup, RedNode}; +use crate::syntax::ast::Markup; +use crate::syntax::{GreenNode, RedNode}; use crate::util::PathExt; #[cfg(feature = "codespan-reporting")] diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 6ca271a96..9ad04be58 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,7 +1,18 @@ -use super::{Ident, NodeKind, RedNode, RedRef, Span, TypedNode}; +//! A typed layer over the red-green tree. + +use std::ops::Deref; + +use super::{NodeKind, RedNode, RedRef, Span}; use crate::geom::{AngularUnit, LengthUnit}; +use crate::parse::is_ident; use crate::util::EcoString; +/// A typed AST node. +pub trait TypedNode: Sized { + /// Convert from a red node to a typed node. + fn from_red(value: RedRef) -> Option; +} + macro_rules! node { ($(#[$attr:meta])* $name:ident) => { node!{$(#[$attr])* $name => $name} @@ -13,7 +24,7 @@ macro_rules! node { pub struct $name(RedNode); impl TypedNode for $name { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { if node.kind() != &NodeKind::$variant { return None; } @@ -23,10 +34,12 @@ macro_rules! node { } impl $name { + /// The source code location. pub fn span(&self) -> Span { self.0.span() } + /// The underlying red node. pub fn underlying(&self) -> RedRef { self.0.as_ref() } @@ -40,7 +53,8 @@ node! { } impl Markup { - pub fn nodes<'a>(&'a self) -> impl Iterator + 'a { + /// The markup nodes. + pub fn nodes(&self) -> impl Iterator + '_ { self.0.children().filter_map(RedRef::cast) } } @@ -73,7 +87,7 @@ pub enum MarkupNode { } impl TypedNode for MarkupNode { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::Space(_) => Some(MarkupNode::Space), NodeKind::Linebreak => Some(MarkupNode::Linebreak), @@ -81,17 +95,14 @@ impl TypedNode for MarkupNode { NodeKind::Strong => Some(MarkupNode::Strong), NodeKind::Emph => Some(MarkupNode::Emph), NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), - NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(u.character.into())), - NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))), - NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))), - NodeKind::NonBreakingSpace => { - Some(MarkupNode::Text(EcoString::from("\u{00A0}"))) - } + NodeKind::UnicodeEscape(c) => Some(MarkupNode::Text((*c).into())), + NodeKind::EnDash => Some(MarkupNode::Text("\u{2013}".into())), + NodeKind::EmDash => Some(MarkupNode::Text("\u{2014}".into())), + NodeKind::NonBreakingSpace => Some(MarkupNode::Text("\u{00A0}".into())), NodeKind::Raw(_) => node.cast().map(MarkupNode::Raw), NodeKind::Heading => node.cast().map(MarkupNode::Heading), NodeKind::List => node.cast().map(MarkupNode::List), NodeKind::Enum => node.cast().map(MarkupNode::Enum), - NodeKind::Error(_, _) => None, _ => node.cast().map(MarkupNode::Expr), } } @@ -111,16 +122,16 @@ pub struct RawNode { } impl TypedNode for RawNode { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::Raw(raw) => { - let span = node.span(); - let start = span.start + raw.backticks as usize; + let full = node.span(); + let start = full.start + raw.backticks as usize; Some(Self { block: raw.block, - lang: raw.lang.as_ref().and_then(|x| { - let span = Span::new(span.source, start, start + x.len()); - Ident::new(x, span) + lang: raw.lang.as_ref().and_then(|lang| { + let span = Span::new(full.source, start, start + lang.len()); + Ident::new(lang, span) }), text: raw.text.clone(), }) @@ -272,7 +283,7 @@ impl Expr { } impl TypedNode for Expr { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::Ident(_) => node.cast().map(Self::Ident), NodeKind::Array => node.cast().map(Self::Array), @@ -325,7 +336,7 @@ pub enum Lit { } impl TypedNode for Lit { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::None => Some(Self::None(node.span())), NodeKind::Auto => Some(Self::Auto(node.span())), @@ -336,13 +347,14 @@ impl TypedNode for Lit { NodeKind::Angle(f, unit) => Some(Self::Angle(node.span(), *f, *unit)), NodeKind::Percentage(f) => Some(Self::Percent(node.span(), *f)), NodeKind::Fraction(f) => Some(Self::Fractional(node.span(), *f)), - NodeKind::Str(s) => Some(Self::Str(node.span(), s.string.clone())), + NodeKind::Str(s) => Some(Self::Str(node.span(), s.clone())), _ => None, } } } impl Lit { + /// The source code location. pub fn span(&self) -> Span { match self { Self::None(span) => *span, @@ -366,7 +378,7 @@ node! { impl ArrayExpr { /// The array items. - pub fn items<'a>(&'a self) -> impl Iterator + 'a { + pub fn items(&self) -> impl Iterator + '_ { self.0.children().filter_map(RedRef::cast) } } @@ -378,7 +390,7 @@ node! { impl DictExpr { /// The named dictionary items. - pub fn items<'a>(&'a self) -> impl Iterator + 'a { + pub fn items(&self) -> impl Iterator + '_ { self.0.children().filter_map(RedRef::cast) } } @@ -439,7 +451,7 @@ node! { impl BlockExpr { /// The list of expressions contained in the block. - pub fn exprs<'a>(&'a self) -> impl Iterator + 'a { + pub fn exprs(&self) -> impl Iterator + '_ { self.0.children().filter_map(RedRef::cast) } } @@ -477,7 +489,7 @@ pub enum UnOp { } impl TypedNode for UnOp { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { Self::from_token(node.kind()) } } @@ -581,7 +593,7 @@ pub enum BinOp { } impl TypedNode for BinOp { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { Self::from_token(node.kind()) } } @@ -709,7 +721,7 @@ node! { impl CallArgs { /// The positional and named arguments. - pub fn items<'a>(&'a self) -> impl Iterator + 'a { + pub fn items(&self) -> impl Iterator + '_ { self.0.children().filter_map(RedRef::cast) } } @@ -726,7 +738,7 @@ pub enum CallArg { } impl TypedNode for CallArg { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::Named => Some(CallArg::Named( node.cast().expect("named call argument is missing name"), @@ -767,7 +779,7 @@ impl ClosureExpr { } /// The parameter bindings. - pub fn params<'a>(&'a self) -> impl Iterator + 'a { + pub fn params(&self) -> impl Iterator + '_ { self.0 .children() .find(|x| x.kind() == &NodeKind::ClosureParams) @@ -805,10 +817,10 @@ pub enum ClosureParam { } impl TypedNode for ClosureParam { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { - NodeKind::Ident(i) => { - Some(ClosureParam::Pos(Ident::new(i, node.span()).unwrap())) + NodeKind::Ident(id) => { + Some(ClosureParam::Pos(Ident::new_unchecked(id, node.span()))) } NodeKind::Named => Some(ClosureParam::Named( node.cast().expect("named closure parameter is missing name"), @@ -921,7 +933,7 @@ pub enum Imports { } impl TypedNode for Imports { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::Star => Some(Imports::Wildcard), NodeKind::ImportItems => { @@ -1043,14 +1055,75 @@ node! { } impl ForPattern { + /// The key part of the pattern: index for arrays, name for dictionaries. pub fn key(&self) -> Option { - let mut items: Vec<_> = self.0.children().filter_map(RedRef::cast).collect(); - if items.len() > 1 { Some(items.remove(0)) } else { None } + let mut children = self.0.children().filter_map(RedRef::cast); + let key = children.next(); + if children.next().is_some() { key } else { None } } + /// The value part of the pattern. pub fn value(&self) -> Ident { self.0 .cast_last_child() .expect("for-in loop pattern is missing value") } } + +/// An unicode identifier with a few extra permissible characters. +/// +/// In addition to what is specified in the [Unicode Standard][uax31], we allow: +/// - `_` as a starting character, +/// - `_` and `-` as continuing characters. +/// +/// [uax31]: http://www.unicode.org/reports/tr31/ +#[derive(Debug, Clone, PartialEq)] +pub struct Ident { + /// The source code location. + pub span: Span, + /// The identifier string. + pub string: EcoString, +} + +impl Ident { + /// Create a new identifier from a string checking that it is a valid. + pub fn new( + string: impl AsRef + Into, + span: impl Into, + ) -> Option { + is_ident(string.as_ref()) + .then(|| Self { span: span.into(), string: string.into() }) + } + + /// Create a new identifier from a string and a span. + /// + /// The `string` must be a valid identifier. + #[track_caller] + pub fn new_unchecked(string: impl Into, span: Span) -> Self { + let string = string.into(); + debug_assert!(is_ident(&string), "`{}` is not a valid identifier", string); + Self { span, string } + } + + /// Return a reference to the underlying string. + pub fn as_str(&self) -> &str { + &self.string + } +} + +impl Deref for Ident { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl TypedNode for Ident { + fn from_red(node: RedRef) -> Option { + match node.kind() { + NodeKind::Ident(string) => Some(Ident::new_unchecked(string, node.span())), + _ => None, + } + } +} diff --git a/src/syntax/ident.rs b/src/syntax/ident.rs deleted file mode 100644 index f5cc63300..000000000 --- a/src/syntax/ident.rs +++ /dev/null @@ -1,94 +0,0 @@ -use std::borrow::Borrow; -use std::ops::Deref; - -use unicode_xid::UnicodeXID; - -use super::{NodeKind, RedRef, Span, TypedNode}; -use crate::util::EcoString; - -/// An unicode identifier with a few extra permissible characters. -/// -/// In addition to what is specified in the [Unicode Standard][uax31], we allow: -/// - `_` as a starting character, -/// - `_` and `-` as continuing characters. -/// -/// [uax31]: http://www.unicode.org/reports/tr31/ -#[derive(Debug, Clone, PartialEq)] -pub struct Ident { - /// The source code location. - pub span: Span, - /// The identifier string. - pub string: EcoString, -} - -impl Ident { - /// Create a new identifier from a string checking that it is a valid. - pub fn new( - string: impl AsRef + Into, - span: impl Into, - ) -> Option { - if is_ident(string.as_ref()) { - Some(Self { span: span.into(), string: string.into() }) - } else { - None - } - } - - /// Return a reference to the underlying string. - pub fn as_str(&self) -> &str { - self - } -} - -impl Deref for Ident { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.string.as_str() - } -} - -impl AsRef for Ident { - fn as_ref(&self) -> &str { - self - } -} - -impl Borrow for Ident { - fn borrow(&self) -> &str { - self - } -} - -impl From<&Ident> for EcoString { - fn from(ident: &Ident) -> Self { - ident.string.clone() - } -} - -impl TypedNode for Ident { - fn cast_from(node: RedRef) -> Option { - match node.kind() { - NodeKind::Ident(i) => Some(Ident::new(i, node.span()).unwrap()), - _ => None, - } - } -} - -/// Whether a string is a valid identifier. -pub fn is_ident(string: &str) -> bool { - let mut chars = string.chars(); - chars - .next() - .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue)) -} - -/// Whether a character can start an identifier. -pub fn is_id_start(c: char) -> bool { - c.is_xid_start() || c == '_' -} - -/// Whether a character can continue an identifier. -pub fn is_id_continue(c: char) -> bool { - c.is_xid_continue() || c == '_' || c == '-' -} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 61e0bb7e3..d26c64849 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,31 +1,28 @@ //! Syntax types. -mod ast; -mod ident; +pub mod ast; mod pretty; mod span; -use std::fmt; -use std::fmt::{Debug, Display, Formatter}; +use std::fmt::{self, Debug, Display, Formatter}; use std::mem; use std::rc::Rc; -pub use ast::*; -pub use ident::*; pub use pretty::*; pub use span::*; +use self::ast::TypedNode; use crate::diag::Error; use crate::geom::{AngularUnit, LengthUnit}; use crate::source::SourceId; use crate::util::EcoString; -/// Children of a [`GreenNode`]. +/// An inner of leaf node in the untyped green tree. #[derive(Clone, PartialEq)] pub enum Green { - /// A non-terminal node in an Rc. + /// A reference-counted inner node. Node(Rc), - /// A terminal owned token. + /// A terminal, owned token. Token(GreenData), } @@ -77,13 +74,12 @@ impl Debug for Green { f.debug_list().entries(&n.children).finish()?; } } - Ok(()) } } -/// A syntactical node. -#[derive(Clone, PartialEq)] +/// An inner node in the untyped green tree. +#[derive(Debug, Clone, PartialEq)] pub struct GreenNode { /// Node metadata. data: GreenData, @@ -122,15 +118,15 @@ impl From> for Green { } } -/// Data shared between [`GreenNode`]s and leaf nodes. -#[derive(Clone, PartialEq)] +/// Data shared between inner and leaf nodes. +#[derive(Debug, Clone, PartialEq)] pub struct GreenData { /// What kind of node this is (each kind would have its own struct in a /// strongly typed AST). kind: NodeKind, /// The byte length of the node in the source. len: usize, - /// Whether this node or any of its children are erroneous. + /// Whether this node or any of its children contain an error. erroneous: bool, } @@ -162,8 +158,9 @@ impl From for Green { } } -/// A borrowed wrapper for the [`GreenNode`] type that allows to access spans, -/// error lists and cast to an AST. +/// A borrowed wrapper for a [`GreenNode`] with span information. +/// +/// Borrowed variant of [`RedNode`]. Can be [cast](Self::cast) to an AST node. #[derive(Copy, Clone, PartialEq)] pub struct RedRef<'a> { id: SourceId, @@ -182,50 +179,27 @@ impl<'a> RedRef<'a> { } /// The type of the node. - pub fn kind(&self) -> &NodeKind { + pub fn kind(self) -> &'a NodeKind { self.green.kind() } - /// The span of the node. - pub fn span(&self) -> Span { - Span::new(self.id, self.offset, self.offset + self.green.len()) - } - /// The length of the node. - pub fn len(&self) -> usize { + pub fn len(self) -> usize { self.green.len() } - /// Convert the node to a typed AST node. - pub fn cast(self) -> Option - where - T: TypedNode, - { - T::cast_from(self) + /// The span of the node. + pub fn span(self) -> Span { + Span::new(self.id, self.offset, self.offset + self.green.len()) } /// Whether the node or its children contain an error. - pub fn erroneous(&self) -> bool { + pub fn erroneous(self) -> bool { self.green.erroneous() } - /// The node's children. - pub fn children(self) -> impl Iterator> + Clone { - let children = match &self.green { - Green::Node(node) => node.children(), - Green::Token(_) => &[], - }; - - let mut offset = self.offset; - children.iter().map(move |green| { - let child_offset = offset; - offset += green.len(); - RedRef { id: self.id, offset: child_offset, green } - }) - } - /// The error messages for this node and its descendants. - pub fn errors(&self) -> Vec { + pub fn errors(self) -> Vec { if !self.green.erroneous() { return vec![]; } @@ -248,19 +222,42 @@ impl<'a> RedRef<'a> { } } + /// Convert the node to a typed AST node. + pub fn cast(self) -> Option + where + T: TypedNode, + { + T::from_red(self) + } + + /// The node's children. + pub fn children(self) -> impl Iterator> { + let children = match &self.green { + Green::Node(node) => node.children(), + Green::Token(_) => &[], + }; + + let mut offset = self.offset; + children.iter().map(move |green| { + let child_offset = offset; + offset += green.len(); + RedRef { id: self.id, offset: child_offset, green } + }) + } + /// Get the first child of some type. - pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { + pub(crate) fn typed_child(self, kind: &NodeKind) -> Option> { self.children() .find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind)) } /// Get the first child that can cast to some AST type. - pub(crate) fn cast_first_child(&self) -> Option { + pub(crate) fn cast_first_child(self) -> Option { self.children().find_map(RedRef::cast) } /// Get the last child that can cast to some AST type. - pub(crate) fn cast_last_child(&self) -> Option { + pub(crate) fn cast_last_child(self) -> Option { self.children().filter_map(RedRef::cast).last() } } @@ -277,8 +274,9 @@ impl Debug for RedRef<'_> { } } -/// An owned wrapper for the [`GreenNode`] type that allows to access spans, -/// error lists and cast to an AST. +/// A owned wrapper for a [`GreenNode`] with span information. +/// +/// Owned variant of [`RedRef`]. Can be [cast](Self::cast) to an AST nodes. #[derive(Clone, PartialEq)] pub struct RedNode { id: SourceId, @@ -293,7 +291,7 @@ impl RedNode { } /// Convert to a borrowed representation. - pub fn as_ref<'a>(&'a self) -> RedRef<'a> { + pub fn as_ref(&self) -> RedRef<'_> { RedRef { id: self.id, offset: self.offset, @@ -301,9 +299,9 @@ impl RedNode { } } - /// The span of the node. - pub fn span(&self) -> Span { - self.as_ref().span() + /// The type of the node. + pub fn kind(&self) -> &NodeKind { + self.as_ref().kind() } /// The length of the node. @@ -311,29 +309,29 @@ impl RedNode { self.as_ref().len() } + /// The span of the node. + pub fn span(&self) -> Span { + self.as_ref().span() + } + + /// The error messages for this node and its descendants. + pub fn errors(&self) -> Vec { + self.as_ref().errors() + } + /// Convert the node to a typed AST node. pub fn cast(self) -> Option where T: TypedNode, { - T::cast_from(self.as_ref()) - } - - /// The type of the node. - pub fn kind(&self) -> &NodeKind { - self.green.kind() + self.as_ref().cast() } /// The children of the node. - pub fn children<'a>(&'a self) -> impl Iterator> + Clone { + pub fn children(&self) -> impl Iterator> { self.as_ref().children() } - /// The error messages for this node and its descendants. - pub fn errors<'a>(&'a self) -> Vec { - self.as_ref().errors() - } - /// Get the first child of some type. pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { self.as_ref().typed_child(kind).map(RedRef::own) @@ -356,11 +354,10 @@ impl Debug for RedNode { } } -pub trait TypedNode: Sized { - /// Performs the conversion. - fn cast_from(value: RedRef) -> Option; -} - +/// All syntactical building blocks that can be part of a Typst document. +/// +/// Can be emitted as a token by the tokenizer or as part of a green node by +/// the parser. #[derive(Debug, Clone, PartialEq)] pub enum NodeKind { /// A left square bracket: `[`. @@ -469,7 +466,7 @@ pub enum NodeKind { EmDash, /// A slash and the letter "u" followed by a hexadecimal unicode entity /// enclosed in curly braces: `\u{1F5FA}`. - UnicodeEscape(UnicodeEscapeData), + UnicodeEscape(char), /// Strong text was enabled / disabled: `*`. Strong, /// Emphasized text was enabled / disabled: `_`. @@ -508,12 +505,12 @@ pub enum NodeKind { /// A percentage: `50%`. /// /// _Note_: `50%` is stored as `50.0` here, as in the corresponding - /// [literal](Lit::Percent). + /// [literal](ast::Lit::Percent). Percentage(f64), /// A fraction unit: `3fr`. Fraction(f64), /// A quoted string: `"..."`. - Str(StrData), + Str(EcoString), /// An array expression: `(1, "hi", 12cm)`. Array, /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. @@ -572,24 +569,7 @@ pub enum NodeKind { Unknown(EcoString), } -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum ErrorPosition { - /// At the start of the node. - Start, - /// Over the full width of the node. - Full, - /// At the end of the node. - End, -} - -/// A quoted string token: `"..."`. -#[derive(Debug, Clone, PartialEq)] -pub struct StrData { - /// The string inside the quotes. - pub string: EcoString, -} - -/// A raw block token: `` `...` ``. +/// Payload of a raw block: `` `...` ``. #[derive(Debug, Clone, PartialEq)] pub struct RawData { /// The raw text in the block. @@ -602,7 +582,7 @@ pub struct RawData { pub block: bool, } -/// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`. +/// Payload of a math formula: `$2pi + x$` or `$[f'(x) = x^2]$`. #[derive(Debug, Clone, PartialEq)] pub struct MathData { /// The formula between the dollars. @@ -612,17 +592,15 @@ pub struct MathData { pub display: bool, } -/// A unicode escape sequence token: `\u{1F5FA}`. -#[derive(Debug, Clone, PartialEq)] -pub struct UnicodeEscapeData { - /// The resulting unicode character. - pub character: char, -} - -impl Display for NodeKind { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.pad(self.as_str()) - } +/// Where in a node an error should be annotated. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ErrorPosition { + /// At the start of the node. + Start, + /// Over the full width of the node. + Full, + /// At the end of the node. + End, } impl NodeKind { @@ -658,6 +636,7 @@ impl NodeKind { matches!(self, NodeKind::Error(_, _) | NodeKind::Unknown(_)) } + /// A human-readable name for the kind. pub fn as_str(&self) -> &'static str { match self { Self::LeftBracket => "opening bracket", @@ -764,3 +743,9 @@ impl NodeKind { } } } + +impl Display for NodeKind { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.pad(self.as_str()) + } +} diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index da0bdd443..b396a39c7 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -2,7 +2,7 @@ use std::fmt::{self, Arguments, Write}; -use super::*; +use super::ast::*; /// Pretty print an item and return the resulting string. pub fn pretty(item: &T) -> String diff --git a/src/syntax/span.rs b/src/syntax/span.rs index ee7cba4c2..c26011bdb 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -1,6 +1,6 @@ use std::cmp::Ordering; use std::fmt::{self, Debug, Formatter}; -use std::ops::{Add, Range}; +use std::ops::Range; use serde::{Deserialize, Serialize}; @@ -53,23 +53,19 @@ pub struct Span { /// The id of the source file. pub source: SourceId, /// The inclusive start position. - pub start: Pos, + pub start: usize, /// The inclusive end position. - pub end: Pos, + pub end: usize, } impl Span { /// Create a new span from start and end positions. - pub fn new(source: SourceId, start: impl Into, end: impl Into) -> Self { - Self { - source, - start: start.into(), - end: end.into(), - } + pub fn new(source: SourceId, start: usize, end: usize) -> Self { + Self { source, start, end } } /// Create a span including just a single position. - pub fn at(source: SourceId, pos: impl Into + Copy) -> Self { + pub fn at(source: SourceId, pos: usize) -> Self { Self::new(source, pos, pos) } @@ -77,19 +73,29 @@ impl Span { pub fn detached() -> Self { Self { source: SourceId::from_raw(0), - start: Pos::ZERO, - end: Pos::ZERO, + start: 0, + end: 0, } } /// Create a span with a different start position. - pub fn with_start(self, start: impl Into) -> Self { - Self { start: start.into(), ..self } + pub fn with_start(self, start: usize) -> Self { + Self { start, ..self } } /// Create a span with a different end position. - pub fn with_end(self, end: impl Into) -> Self { - Self { end: end.into(), ..self } + pub fn with_end(self, end: usize) -> Self { + Self { end, ..self } + } + + /// A new span at the position of this span's start. + pub fn at_start(&self) -> Span { + Self::at(self.source, self.start) + } + + /// A new span at the position of this span's end. + pub fn at_end(&self) -> Span { + Self::at(self.source, self.end) } /// Create a new span with the earlier start and later end position. @@ -110,28 +116,18 @@ impl Span { } /// Test whether a position is within the span. - pub fn contains_pos(&self, pos: Pos) -> bool { + pub fn contains(&self, pos: usize) -> bool { self.start <= pos && self.end >= pos } /// Test whether one span complete contains the other span. - pub fn contains(self, other: Self) -> bool { + pub fn surrounds(self, other: Self) -> bool { self.source == other.source && self.start <= other.start && self.end >= other.end } - /// Convert to a `Range` for indexing. + /// Convert to a `Range` for indexing. pub fn to_range(self) -> Range { - self.start.to_usize() .. self.end.to_usize() - } - - /// A new span at the position of this span's start. - pub fn at_start(&self) -> Span { - Self::at(self.source, self.start) - } - - /// A new span at the position of this span's end. - pub fn at_end(&self) -> Span { - Self::at(self.source, self.end) + self.start .. self.end } } @@ -150,77 +146,3 @@ impl PartialOrd for Span { } } } - -/// A byte position in source code. -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)] -pub struct Pos(pub u32); - -impl Pos { - /// The zero position. - pub const ZERO: Self = Self(0); - - /// Convert to a usize for indexing. - pub fn to_usize(self) -> usize { - self.0 as usize - } -} - -impl Debug for Pos { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - Debug::fmt(&self.0, f) - } -} - -impl From for Pos { - fn from(index: u32) -> Self { - Self(index) - } -} - -impl From for Pos { - fn from(index: usize) -> Self { - Self(index as u32) - } -} - -impl Add for Pos -where - T: Into, -{ - type Output = Self; - - fn add(self, rhs: T) -> Self { - Pos(self.0 + rhs.into().0) - } -} - -/// Convert a position or range into a span. -pub trait IntoSpan { - /// Convert into a span by providing the source id. - fn into_span(self, source: SourceId) -> Span; -} - -impl IntoSpan for Span { - fn into_span(self, source: SourceId) -> Span { - debug_assert_eq!(self.source, source); - self - } -} - -impl IntoSpan for Pos { - fn into_span(self, source: SourceId) -> Span { - Span::new(source, self, self) - } -} - -impl IntoSpan for usize { - fn into_span(self, source: SourceId) -> Span { - Span::new(source, self, self) - } -} - -impl IntoSpan for Range { - fn into_span(self, source: SourceId) -> Span { - Span::new(source, self.start, self.end) - } -} diff --git a/tests/typeset.rs b/tests/typeset.rs index bde383c40..68e56343c 100644 --- a/tests/typeset.rs +++ b/tests/typeset.rs @@ -24,7 +24,7 @@ use typst::loading::FsLoader; use typst::parse::Scanner; use typst::source::SourceFile; use typst::style::Style; -use typst::syntax::{Pos, Span}; +use typst::syntax::Span; use typst::Context; const TYP_DIR: &str = "./typ"; @@ -355,12 +355,12 @@ fn parse_metadata(source: &SourceFile) -> (Option, Vec) { let comments = lines[i ..].iter().take_while(|line| line.starts_with("//")).count(); - let pos = |s: &mut Scanner| -> Pos { + let pos = |s: &mut Scanner| -> usize { let first = num(s) - 1; let (delta, column) = if s.eat_if(':') { (first, num(s) - 1) } else { (0, first) }; let line = (i + comments) + delta; - source.line_column_to_byte(line, column).unwrap().into() + source.line_column_to_byte(line, column).unwrap() }; let mut s = Scanner::new(rest); @@ -375,10 +375,10 @@ fn parse_metadata(source: &SourceFile) -> (Option, Vec) { } fn print_error(source: &SourceFile, line: usize, error: &Error) { - let start_line = 1 + line + source.byte_to_line(error.span.start.to_usize()).unwrap(); - let start_col = 1 + source.byte_to_column(error.span.start.to_usize()).unwrap(); - let end_line = 1 + line + source.byte_to_line(error.span.end.to_usize()).unwrap(); - let end_col = 1 + source.byte_to_column(error.span.end.to_usize()).unwrap(); + let start_line = 1 + line + source.byte_to_line(error.span.start).unwrap(); + let start_col = 1 + source.byte_to_column(error.span.start).unwrap(); + let end_line = 1 + line + source.byte_to_line(error.span.end).unwrap(); + let end_col = 1 + source.byte_to_column(error.span.end).unwrap(); println!( "Error: {}:{}-{}:{}: {}", start_line, start_col, end_line, end_col, error.message From f0c9635db5efd0c66e01bef1be0a8f140fdbdd84 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Thu, 4 Nov 2021 15:16:46 +0100 Subject: [PATCH 10/18] Notes --- src/parse/mod.rs | 11 +++++++++++ src/parse/parser.rs | 43 ++++++++++++++++++++----------------------- src/syntax/mod.rs | 14 ++++++++++++++ 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index bfe938960..30e20c0db 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -53,6 +53,8 @@ where p.start(); while !p.eof() && f(p) { markup_node(p, &mut at_start); + // NOTE: Just do this at the end of markup_node. Maybe even gives a + // speed boost. Wasn't possible in old parser due to use of ?. if let Some(node) = p.last_child() { at_start &= matches!(node.kind(), &NodeKind::Space(_) | &NodeKind::Parbreak | @@ -115,6 +117,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { let group = if stmt { Group::Stmt } else { Group::Expr }; p.start_group(group, TokenMode::Code); + // NOTE: Return success from expr_with? expr_with(p, true, 0); if stmt && p.success() && !p.eof() { p.expected_at("semicolon or line break"); @@ -138,6 +141,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { /// Parse a heading. fn heading(p: &mut Parser) { + // NOTE: Remove HeadingLevel kind and simply count Eq children in AST. p.start(); p.start(); p.eat_assert(&NodeKind::Eq); @@ -198,6 +202,8 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { let prec = op.precedence(); expr_with(p, atomic, prec); + // NOTE: Lifting not needed if we don't start in the first place. + // Then we could simply do expr_with(p, atomic, prec)?; if p.may_lift_abort() { return; } @@ -264,6 +270,10 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { break; } + // NOTE: All lifts up to here wouldn't be needed. + // Only here we then need to do + // marker.end(p, NodeKind::Binary); + offset = p.end_and_start_with(NodeKind::Binary).0; } } @@ -456,6 +466,7 @@ fn item(p: &mut Parser) -> NodeKind { if p.eat_if(&NodeKind::Dots) { expr(p); + // NOTE: Should be called `Spread`. p.end_or_abort(NodeKind::ParameterSink); return NodeKind::ParameterSink; } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5833c724a..5ecb6e9dc 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -187,17 +187,8 @@ impl<'s> Parser<'s> { /// Eat and wrap the next token. pub fn convert(&mut self, kind: NodeKind) { - let len = self.tokens.index() - self.next_start; - - self.children.push( - GreenNode::with_child( - kind, - len, - GreenData::new(self.next.clone().unwrap(), len), - ) - .into(), - ); - self.fast_forward(); + self.eat(); + self.children.last_mut().unwrap().set_kind(kind); self.success = true; } @@ -278,6 +269,7 @@ impl<'s> Parser<'s> { } /// Consume the next token and return its kind. + // NOTE: This isn't great. fn eat_peeked(&mut self) -> Option { let token = self.peek()?.clone(); self.eat(); @@ -319,6 +311,7 @@ impl<'s> Parser<'s> { /// Consume the next token, debug-asserting that it is one of the given ones. pub fn eat_assert(&mut self, t: &NodeKind) { + // NOTE: assert with peek(), then eat() let next = self.eat_peeked(); debug_assert_eq!(next.as_ref(), Some(t)); } @@ -438,8 +431,6 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { - self.tokens.jump(self.prev_end()); - if prev_mode == TokenMode::Code { let len = self.children.len(); for n in (0 .. len).rev() { @@ -451,7 +442,11 @@ impl<'s> Parser<'s> { } } - self.fast_forward(); + self.tokens.jump(self.prev_end()); + self.prev_end = self.tokens.index().into(); + self.next_start = self.tokens.index().into(); + self.next = self.tokens.next(); + self.repeek(); } } @@ -527,21 +522,23 @@ impl<'s> Parser<'s> { .into(), ); - self.fast_forward(); - } - - /// Move to the next token. - pub fn fast_forward(&mut self) { - if !self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.prev_end = self.tokens.index().into(); - } + self.prev_end = self.tokens.index().into(); self.next_start = self.tokens.index().into(); self.next = self.tokens.next(); if self.tokens.mode() == TokenMode::Code { // Skip whitespace and comments. while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.eat(); + self.children.push( + GreenData::new( + self.next.clone().unwrap(), + self.tokens.index() - self.next_start, + ) + .into(), + ); + + self.next_start = self.tokens.index().into(); + self.next = self.tokens.next(); } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index d26c64849..112fc220f 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -40,6 +40,15 @@ impl Green { self.data().kind() } + /// Set the type of the node. + pub fn set_kind(&mut self, kind: NodeKind) { + let data = match self { + Self::Node(node) => &mut Rc::make_mut(node).data, + Self::Token(data) => data, + }; + data.set_kind(kind); + } + /// The length of the node. pub fn len(&self) -> usize { self.data().len() @@ -141,6 +150,11 @@ impl GreenData { &self.kind } + /// Set the type of the node. + pub fn set_kind(&mut self, kind: NodeKind) { + self.kind = kind; + } + /// The length of the node. pub fn len(&self) -> usize { self.len From 5c952d56d0d602a1dbcf85210ae30fa402219fca Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Thu, 4 Nov 2021 19:36:32 +0100 Subject: [PATCH 11/18] New error handling --- src/parse/mod.rs | 514 ++++++++++++++++++----------------------- src/parse/parser.rs | 228 ++++++++---------- src/syntax/ast.rs | 12 +- src/syntax/mod.rs | 7 +- tests/typ/code/let.typ | 2 +- 5 files changed, 325 insertions(+), 438 deletions(-) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 30e20c0db..92220eaab 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -16,6 +16,8 @@ use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; use crate::util::EcoString; +type ParseResult = Result<(), ()>; + /// Parse a source file. pub fn parse(source: &str) -> Rc { let mut p = Parser::new(source); @@ -53,24 +55,16 @@ where p.start(); while !p.eof() && f(p) { markup_node(p, &mut at_start); - // NOTE: Just do this at the end of markup_node. Maybe even gives a - // speed boost. Wasn't possible in old parser due to use of ?. - if let Some(node) = p.last_child() { - at_start &= matches!(node.kind(), - &NodeKind::Space(_) | &NodeKind::Parbreak | - &NodeKind::LineComment | &NodeKind::BlockComment - ); - } } p.end(NodeKind::Markup); } /// Parse a markup node. -fn markup_node(p: &mut Parser, at_start: &mut bool) { +fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { let token = match p.peek() { Some(t) => t, - None => return, + None => return Ok(()), }; match token { @@ -83,6 +77,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { } else { p.convert(NodeKind::Parbreak); } + return Ok(()); } // Text and markup. @@ -94,7 +89,10 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::Strong | NodeKind::Linebreak | NodeKind::Raw(_) - | NodeKind::UnicodeEscape(_) => p.eat(), + | NodeKind::UnicodeEscape(_) => { + p.eat(); + Ok(()) + } NodeKind::Eq if *at_start => heading(p), NodeKind::ListBullet if *at_start => list_node(p), @@ -102,7 +100,8 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Line-based markup that is not currently at the start of the line. NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { - p.convert(NodeKind::Text(p.peek_src().into())) + p.convert(NodeKind::Text(p.peek_src().into())); + Ok(()) } // Hashtag + keyword / identifier. @@ -117,12 +116,11 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { let group = if stmt { Group::Stmt } else { Group::Expr }; p.start_group(group, TokenMode::Code); - // NOTE: Return success from expr_with? - expr_with(p, true, 0); - if stmt && p.success() && !p.eof() { + let res = expr_with(p, true, 0); + if stmt && res.is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } - p.end_group(); + p.end_group() } // Block and template. @@ -130,19 +128,28 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { NodeKind::LeftBracket => template(p), // Comments. - NodeKind::LineComment | NodeKind::BlockComment | NodeKind::Error(_, _) => p.eat(), + NodeKind::LineComment | NodeKind::BlockComment => { + p.eat(); + return Ok(()); + } + + NodeKind::Error(_, _) => { + p.eat(); + Ok(()) + } _ => { - *at_start = false; p.unexpected(); + Err(()) } - }; + }?; + + *at_start = false; + Ok(()) } /// Parse a heading. -fn heading(p: &mut Parser) { - // NOTE: Remove HeadingLevel kind and simply count Eq children in AST. - p.start(); +fn heading(p: &mut Parser) -> ParseResult { p.start(); p.eat_assert(&NodeKind::Eq); @@ -153,36 +160,37 @@ fn heading(p: &mut Parser) { } if level > 6 { - p.lift(); p.end(NodeKind::Text(EcoString::from('=').repeat(level))); } else { - p.end(NodeKind::HeadingLevel(level as u8)); let column = p.column(p.prev_end()); markup_indented(p, column); p.end(NodeKind::Heading); } + Ok(()) } /// Parse a single list item. -fn list_node(p: &mut Parser) { +fn list_node(p: &mut Parser) -> ParseResult { p.start(); p.eat_assert(&NodeKind::ListBullet); let column = p.column(p.prev_end()); markup_indented(p, column); p.end(NodeKind::List); + Ok(()) } /// Parse a single enum item. -fn enum_node(p: &mut Parser) { +fn enum_node(p: &mut Parser) -> ParseResult { p.start(); p.eat(); let column = p.column(p.prev_end()); markup_indented(p, column); p.end(NodeKind::Enum); + Ok(()) } /// Parse an expression. -fn expr(p: &mut Parser) { +fn expr(p: &mut Parser) -> ParseResult { expr_with(p, false, 0) } @@ -193,28 +201,19 @@ fn expr(p: &mut Parser) { /// in markup. /// /// Stops parsing at operations with lower precedence than `min_prec`, -fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { - p.start(); - let mut offset = p.child_count(); +fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { + let marker = p.marker(); + // Start the unary expression. match p.eat_map(|x| UnOp::from_token(&x)) { Some(op) => { let prec = op.precedence(); - expr_with(p, atomic, prec); + expr_with(p, atomic, prec)?; - // NOTE: Lifting not needed if we don't start in the first place. - // Then we could simply do expr_with(p, atomic, prec)?; - if p.may_lift_abort() { - return; - } - - p.end_and_start_with(NodeKind::Unary); + marker.end(p, NodeKind::Unary); } None => { - primary(p, atomic); - if p.may_lift_abort() { - return; - } + primary(p, atomic)?; } }; @@ -225,35 +224,28 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { p.peek_direct(), Some(NodeKind::LeftParen | NodeKind::LeftBracket) ) { - call(p, p.child_count() - offset); + call(p, &marker); continue; } - if p.peek() == Some(&NodeKind::With) { - with_expr(p, p.child_count() - offset); - - if p.may_lift_abort() { - return; - } + if atomic { + break Ok(()); } - if atomic { - p.lift(); - break; + if p.peek() == Some(&NodeKind::With) { + with_expr(p, &marker)?; } let op = match p.peek().and_then(BinOp::from_token) { Some(binop) => binop, None => { - p.lift(); - break; + break Ok(()); } }; let mut prec = op.precedence(); if prec < min_prec { - p.lift(); - break; + break Ok(()); } p.eat(); @@ -263,44 +255,38 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { Associativity::Right => {} } - expr_with(p, atomic, prec); - - if !p.success() { - p.lift(); - break; + if expr_with(p, atomic, prec).is_err() { + break Ok(()); } - // NOTE: All lifts up to here wouldn't be needed. - // Only here we then need to do - // marker.end(p, NodeKind::Binary); - - offset = p.end_and_start_with(NodeKind::Binary).0; + marker.end(p, NodeKind::Binary); } } /// Parse a primary expression. -fn primary(p: &mut Parser, atomic: bool) { - if literal(p) { - return; +fn primary(p: &mut Parser, atomic: bool) -> ParseResult { + let lit = literal(p); + if lit.is_ok() { + return lit; } match p.peek() { // Things that start with an identifier. Some(NodeKind::Ident(_)) => { // Start closure params. - p.start(); + let marker = p.marker(); p.eat(); // Arrow means this is a closure's lone parameter. if !atomic && p.peek() == Some(&NodeKind::Arrow) { - p.end_and_start_with(NodeKind::ClosureParams); + marker.end(p, NodeKind::ClosureParams); p.eat(); - expr(p); - - p.end_or_abort(NodeKind::Closure); + let e = expr(p); + marker.end(p, NodeKind::Closure); + e } else { - p.lift(); + Ok(()) } } @@ -319,18 +305,19 @@ fn primary(p: &mut Parser, atomic: bool) { Some(NodeKind::Error(_, _)) => { p.eat(); + Ok(()) } // Nothing. _ => { p.expected("expression"); - p.unsuccessful(); + Err(()) } } } /// Parse a literal. -fn literal(p: &mut Parser) -> bool { +fn literal(p: &mut Parser) -> ParseResult { match p.peek() { // Basic values. Some( @@ -346,10 +333,10 @@ fn literal(p: &mut Parser) -> bool { | NodeKind::Str(_), ) => { p.eat(); - true + Ok(()) } - _ => false, + _ => Err(()), } } @@ -358,47 +345,39 @@ fn literal(p: &mut Parser) -> bool { /// - Dictionary literal /// - Parenthesized expression /// - Parameter list of closure expression -fn parenthesized(p: &mut Parser) { - let offset = p.child_count(); - p.start(); +fn parenthesized(p: &mut Parser) -> ParseResult { + let marker = p.marker(); p.start_group(Group::Paren, TokenMode::Code); let colon = p.eat_if(&NodeKind::Colon); let kind = collection(p).0; p.end_group(); - let token_count = p.child_count() - offset; // Leading colon makes this a (empty) dictionary. if colon { - p.lift(); - dict(p, token_count); - return; + return dict(p, &marker); } // Arrow means this is a closure's parameter list. if p.peek() == Some(&NodeKind::Arrow) { - p.start_with(token_count); - params(p, 0, true); - p.end(NodeKind::ClosureParams); + params(p, &marker, true); + marker.end(p, NodeKind::ClosureParams); p.eat_assert(&NodeKind::Arrow); - expr(p); + let r = expr(p); - p.end_or_abort(NodeKind::Closure); - return; + marker.end(p, NodeKind::Closure); + return r; } // Find out which kind of collection this is. match kind { - CollectionKind::Group => p.end(NodeKind::Group), - CollectionKind::Positional => { - p.lift(); - array(p, token_count); - } - CollectionKind::Named => { - p.lift(); - dict(p, token_count); + CollectionKind::Group => { + marker.end(p, NodeKind::Group); + Ok(()) } + CollectionKind::Positional => array(p, &marker), + CollectionKind::Named => dict(p, &marker), } } @@ -422,23 +401,22 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { let mut items = 0; let mut kind = CollectionKind::Positional; let mut has_comma = false; - let mut missing_coma = None; + let mut missing_coma: Option = None; while !p.eof() { - let item_kind = item(p); - if p.success() { + if let Ok(item_kind) = item(p) { if items == 0 && item_kind == NodeKind::Named { kind = CollectionKind::Named; } - if item_kind == NodeKind::ParameterSink { + if item_kind == NodeKind::Spread { has_comma = true; } items += 1; - if let Some(pos) = missing_coma.take() { - p.expected_at_child(pos, "comma"); + if let Some(marker) = missing_coma.take() { + marker.expected_at(p, "comma"); } if p.eof() { @@ -448,7 +426,7 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { if p.eat_if(&NodeKind::Comma) { has_comma = true; } else { - missing_coma = Some(p.child_count()); + missing_coma = Some(p.marker()); } } } @@ -461,52 +439,49 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { } /// Parse an expression or a named pair. Returns if this is a named pair. -fn item(p: &mut Parser) -> NodeKind { - p.start(); +fn item(p: &mut Parser) -> Result { + let marker = p.marker(); if p.eat_if(&NodeKind::Dots) { - expr(p); + let r = expr(p); - // NOTE: Should be called `Spread`. - p.end_or_abort(NodeKind::ParameterSink); - return NodeKind::ParameterSink; + marker.end(p, NodeKind::Spread); + return r.map(|_| NodeKind::Spread); } - expr(p); - - if p.may_lift_abort() { - return NodeKind::None; + let ident_marker = p.marker(); + if expr(p).is_err() { + return Err(()); } - if p.eat_if(&NodeKind::Colon) { - let child = p.child(1).unwrap(); - if matches!(child.kind(), &NodeKind::Ident(_)) { - expr(p); - p.end_or_abort(NodeKind::Named); + if p.peek() == Some(&NodeKind::Colon) { + let r = if matches!(p.child(0).unwrap().kind(), &NodeKind::Ident(_)) { + p.eat(); + expr(p) } else { - p.wrap( - 1, + ident_marker.end( + p, NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), ); + p.eat(); expr(p); - p.end(NodeKind::Named); - p.unsuccessful(); - } + Err(()) + }; - NodeKind::Named + marker.end(p, NodeKind::Named); + r.map(|_| NodeKind::Named) } else { - p.lift(); - p.last_child().unwrap().kind().clone() + Ok(p.last_child().unwrap().kind().clone()) } } /// Convert a collection into an array, producing errors for anything other than /// expressions. -fn array(p: &mut Parser, items: usize) { - p.filter_children( - p.child_count() - items, +fn array(p: &mut Parser, marker: &Marker) -> ParseResult { + marker.filter_children( + p, |x| match x.kind() { - NodeKind::Named | NodeKind::ParameterSink => false, + NodeKind::Named | NodeKind::Spread => false, _ => true, }, |kind| match kind { @@ -514,21 +489,22 @@ fn array(p: &mut Parser, items: usize) { ErrorPosition::Full, "expected expression, found named pair".into(), ), - NodeKind::ParameterSink => { + NodeKind::Spread => { (ErrorPosition::Full, "spreading is not allowed here".into()) } _ => unreachable!(), }, ); - p.convert_with(items, NodeKind::Array); + marker.end(p, NodeKind::Array); + Ok(()) } /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. -fn dict(p: &mut Parser, items: usize) { - p.filter_children( - p.child_count() - items, +fn dict(p: &mut Parser, marker: &Marker) -> ParseResult { + marker.filter_children( + p, |x| { x.kind() == &NodeKind::Named || x.kind().is_paren() @@ -536,7 +512,7 @@ fn dict(p: &mut Parser, items: usize) { || x.kind() == &NodeKind::Colon }, |kind| match kind { - NodeKind::ParameterSink => { + NodeKind::Spread => { (ErrorPosition::Full, "spreading is not allowed here".into()) } _ => ( @@ -545,17 +521,19 @@ fn dict(p: &mut Parser, items: usize) { ), }, ); - p.convert_with(items, NodeKind::Dict); + + marker.end(p, NodeKind::Dict); + Ok(()) } /// Convert a collection into a list of parameters, producing errors for /// anything other than identifiers, spread operations and named pairs. -fn params(p: &mut Parser, count: usize, allow_parens: bool) { - p.filter_children( - count, +fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { + marker.filter_children( + p, |x| match x.kind() { NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => true, - NodeKind::ParameterSink => matches!( + NodeKind::Spread => matches!( x.children().last().map(|x| x.kind()), Some(&NodeKind::Ident(_)) ), @@ -567,22 +545,22 @@ fn params(p: &mut Parser, count: usize, allow_parens: bool) { } // Parse a template block: `[...]`. -fn template(p: &mut Parser) { +fn template(p: &mut Parser) -> ParseResult { p.start(); p.start_group(Group::Bracket, TokenMode::Markup); markup(p); p.end_group(); p.end(NodeKind::Template); + Ok(()) } /// Parse a code block: `{...}`. -fn block(p: &mut Parser) { +fn block(p: &mut Parser) -> ParseResult { p.start(); p.start_group(Group::Brace, TokenMode::Code); while !p.eof() { p.start_group(Group::Stmt, TokenMode::Code); - expr(p); - if p.success() { + if expr(p).is_ok() { if !p.eof() { p.expected_at("semicolon or line break"); } @@ -594,25 +572,25 @@ fn block(p: &mut Parser) { } p.end_group(); p.end(NodeKind::Block); + Ok(()) } /// Parse a function call. -fn call(p: &mut Parser, callee: usize) { - p.start_with(callee); - match p.peek_direct() { +fn call(p: &mut Parser, callee: &Marker) -> ParseResult { + let res = match p.peek_direct() { Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => args(p, true), _ => { p.expected_at("argument list"); - p.may_end_abort(NodeKind::Call); - return; + Err(()) } }; - p.end(NodeKind::Call); + callee.end(p, NodeKind::Call); + res } /// Parse the arguments to a function call. -fn args(p: &mut Parser, allow_template: bool) { +fn args(p: &mut Parser, allow_template: bool) -> ParseResult { p.start(); if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { p.start_group(Group::Paren, TokenMode::Code); @@ -625,167 +603,126 @@ fn args(p: &mut Parser, allow_template: bool) { } p.end(NodeKind::CallArgs); + Ok(()) } /// Parse a with expression. -fn with_expr(p: &mut Parser, preserve: usize) { - p.start_with(preserve); +fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { p.eat_assert(&NodeKind::With); - if p.peek() == Some(&NodeKind::LeftParen) { - args(p, false); - p.end(NodeKind::WithExpr); + let res = if p.peek() == Some(&NodeKind::LeftParen) { + args(p, false) } else { p.expected("argument list"); - p.may_end_abort(NodeKind::WithExpr); - } + Err(()) + }; + + marker.end(p, NodeKind::WithExpr); + res } /// Parse a let expression. -fn let_expr(p: &mut Parser) { - p.start(); - p.eat_assert(&NodeKind::Let); +fn let_expr(p: &mut Parser) -> ParseResult { + p.perform(NodeKind::LetExpr, |p| { + p.eat_assert(&NodeKind::Let); - let offset = p.child_count(); - ident(p); - if p.may_end_abort(NodeKind::LetExpr) { - return; - } + let marker = p.marker(); + ident(p)?; - if p.peek() == Some(&NodeKind::With) { - with_expr(p, p.child_count() - offset); - } else { - // If a parenthesis follows, this is a function definition. - let has_params = if p.peek_direct() == Some(&NodeKind::LeftParen) { - p.start(); - p.start_group(Group::Paren, TokenMode::Code); - let offset = p.child_count(); - collection(p); - params(p, offset, true); - p.end_group(); - p.end(NodeKind::ClosureParams); - true + if p.peek() == Some(&NodeKind::With) { + with_expr(p, &marker); } else { - false - }; + // If a parenthesis follows, this is a function definition. + let has_params = if p.peek_direct() == Some(&NodeKind::LeftParen) { + p.start(); + p.start_group(Group::Paren, TokenMode::Code); + let marker = p.marker(); + collection(p); + params(p, &marker, true); + p.end_group(); + p.end(NodeKind::ClosureParams); + true + } else { + false + }; - if p.eat_if(&NodeKind::Eq) { - expr(p); - } else if has_params { - // Function definitions must have a body. - p.expected_at("body"); - } - - // Rewrite into a closure expression if it's a function definition. - if has_params { - if p.may_end_abort(NodeKind::LetExpr) { - return; + if p.eat_if(&NodeKind::Eq) { + expr(p)?; + } else if has_params { + // Function definitions must have a body. + p.expected_at("body"); } - p.convert_with(p.child_count() - offset, NodeKind::Closure); + // Rewrite into a closure expression if it's a function definition. + if has_params { + marker.end(p, NodeKind::Closure); + } } - } - p.end(NodeKind::LetExpr); + Ok(()) + }) } /// Parse an if expresion. -fn if_expr(p: &mut Parser) { - p.start(); - p.eat_assert(&NodeKind::If); +fn if_expr(p: &mut Parser) -> ParseResult { + p.perform(NodeKind::IfExpr, |p| { + p.eat_assert(&NodeKind::If); - expr(p); - if p.may_end_abort(NodeKind::IfExpr) { - return; - } + expr(p)?; + body(p)?; - body(p); - if p.may_end_abort(NodeKind::IfExpr) { - // Expected function body. - return; - } - - if p.eat_if(&NodeKind::Else) { - if p.peek() == Some(&NodeKind::If) { - if_expr(p); - } else { - body(p); + if p.eat_if(&NodeKind::Else) { + if p.peek() == Some(&NodeKind::If) { + if_expr(p)?; + } else { + body(p)?; + } } - } - p.end(NodeKind::IfExpr); + Ok(()) + }) } /// Parse a while expresion. -fn while_expr(p: &mut Parser) { - p.start(); - p.eat_assert(&NodeKind::While); - - expr(p); - - if p.may_end_abort(NodeKind::WhileExpr) { - return; - } - - body(p); - if !p.may_end_abort(NodeKind::WhileExpr) { - p.end(NodeKind::WhileExpr); - } +fn while_expr(p: &mut Parser) -> ParseResult { + p.perform(NodeKind::WhileExpr, |p| { + p.eat_assert(&NodeKind::While); + expr(p)?; + body(p)?; + Ok(()) + }) } /// Parse a for expression. -fn for_expr(p: &mut Parser) { - p.start(); - p.eat_assert(&NodeKind::For); +fn for_expr(p: &mut Parser) -> ParseResult { + p.perform(NodeKind::ForExpr, |p| { + p.eat_assert(&NodeKind::For); - for_pattern(p); - - if p.may_end_abort(NodeKind::ForExpr) { - return; - } - - if p.eat_expect(&NodeKind::In) { - expr(p); - - if p.may_end_abort(NodeKind::ForExpr) { - return; + for_pattern(p)?; + if p.eat_expect(&NodeKind::In) { + expr(p)?; + body(p)?; + Ok(()) + } else { + Err(()) } - - body(p); - - if !p.may_end_abort(NodeKind::ForExpr) { - p.end(NodeKind::ForExpr); - } - } else { - p.unsuccessful(); - p.may_end_abort(NodeKind::ForExpr); - } + }) } /// Parse a for loop pattern. -fn for_pattern(p: &mut Parser) { - p.start(); - ident(p); - - if p.may_end_abort(NodeKind::ForPattern) { - return; - } - - if p.peek() == Some(&NodeKind::Comma) { - p.eat(); - - ident(p); - - if p.may_end_abort(NodeKind::ForPattern) { - return; +fn for_pattern(p: &mut Parser) -> ParseResult { + p.perform(NodeKind::ForPattern, |p| { + ident(p)?; + if p.peek() == Some(&NodeKind::Comma) { + p.eat(); + ident(p)?; } - } - - p.end(NodeKind::ForPattern); + Ok(()) + }) } /// Parse an import expression. -fn import_expr(p: &mut Parser) { +fn import_expr(p: &mut Parser) -> ParseResult { p.start(); p.eat_assert(&NodeKind::Import); @@ -793,15 +730,15 @@ fn import_expr(p: &mut Parser) { // This is the list of identifiers scenario. p.start(); p.start_group(Group::Imports, TokenMode::Code); - let offset = p.child_count(); + let marker = p.marker(); let items = collection(p).1; if items == 0 { p.expected_at("import items"); } p.end_group(); - p.filter_children( - offset, + marker.filter_children( + p, |n| matches!(n.kind(), NodeKind::Ident(_) | NodeKind::Comma), |_| (ErrorPosition::Full, "expected identifier".into()), ); @@ -813,36 +750,41 @@ fn import_expr(p: &mut Parser) { } p.end(NodeKind::ImportExpr); + Ok(()) } /// Parse an include expression. -fn include_expr(p: &mut Parser) { +fn include_expr(p: &mut Parser) -> ParseResult { p.start(); p.eat_assert(&NodeKind::Include); expr(p); p.end(NodeKind::IncludeExpr); + Ok(()) } /// Parse an identifier. -fn ident(p: &mut Parser) { +fn ident(p: &mut Parser) -> ParseResult { match p.peek() { - Some(NodeKind::Ident(_)) => p.eat(), + Some(NodeKind::Ident(_)) => { + p.eat(); + Ok(()) + } _ => { p.expected("identifier"); - p.unsuccessful(); + Err(()) } } } /// Parse a control flow body. -fn body(p: &mut Parser) { +fn body(p: &mut Parser) -> ParseResult { match p.peek() { Some(NodeKind::LeftBracket) => template(p), Some(NodeKind::LeftBrace) => block(p), _ => { p.expected_at("body"); - p.unsuccessful(); + Err(()) } } } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5ecb6e9dc..bc028876c 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::ops::Range; use std::rc::Rc; -use super::{TokenMode, Tokens}; +use super::{ParseResult, TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -26,8 +26,6 @@ pub struct Parser<'s> { stack: Vec>, /// The children of the currently built node. children: Vec, - /// Whether the last parsing step was successful. - success: bool, } /// A logical group of tokens, e.g. `[...]`. @@ -58,6 +56,49 @@ pub enum Group { Imports, } +/// A marker that indicates where a child may start. +pub struct Marker(usize); + +impl Marker { + /// Wraps all children in front of the marker. + pub fn end(&self, p: &mut Parser, kind: NodeKind) { + if p.children.len() != self.0 { + let stop_nl = p.stop_at_newline(); + let end = (self.0 .. p.children.len()) + .rev() + .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .unwrap_or(self.0) + + 1; + + let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + let len = children.iter().map(Green::len).sum(); + p.children + .insert(self.0, GreenNode::with_children(kind, len, children).into()); + } + } + + /// Wrap all children that do not fulfill the predicate in error nodes. + pub fn filter_children(&self, p: &mut Parser, f: F, error: G) + where + F: Fn(&Green) -> bool, + G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + { + p.filter_children(self, f, error) + } + + /// Insert an error message that `what` was expected at the marker position. + pub fn expected_at(&self, p: &mut Parser, what: &str) { + p.children.insert( + self.0, + GreenData::new( + NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), + 0, + ) + .into(), + ); + } +} + impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { @@ -73,7 +114,6 @@ impl<'s> Parser<'s> { next_start: 0, stack: vec![], children: vec![], - success: true, } } @@ -85,19 +125,13 @@ impl<'s> Parser<'s> { self.stack.push(std::mem::take(&mut self.children)); } - /// Start a nested node, preserving a number of the current children. - pub fn start_with(&mut self, preserve: usize) { - let preserved = self.children.drain(self.children.len() - preserve ..).collect(); - self.stack.push(std::mem::replace(&mut self.children, preserved)); - } - /// Filter the last children using the given predicate. - pub fn filter_children(&mut self, count: usize, f: F, error: G) + fn filter_children(&mut self, count: &Marker, f: F, error: G) where F: Fn(&Green) -> bool, G: Fn(&NodeKind) -> (ErrorPosition, EcoString), { - for child in &mut self.children[count ..] { + for child in &mut self.children[count.0 ..] { if !((self.tokens.mode() != TokenMode::Code || Self::skip_type_ext(child.kind(), false)) || child.kind().is_error() @@ -161,46 +195,22 @@ impl<'s> Parser<'s> { self.children .push(GreenNode::with_children(kind, len, children).into()); self.children.extend(remains); - self.success = true; } - /// End the current node as a node of given `kind`, and start a new node - /// with the ended node as a first child. The function returns how many - /// children the stack frame had before and how many were appended (accounts - /// for trivia). - pub fn end_and_start_with(&mut self, kind: NodeKind) -> (usize, usize) { - let stack_offset = self.stack.last().unwrap().len(); + pub fn perform(&mut self, kind: NodeKind, f: F) -> ParseResult + where + F: FnOnce(&mut Self) -> ParseResult, + { + self.start(); + let success = f(self); self.end(kind); - let diff = self.children.len() - stack_offset; - self.start_with(diff); - (stack_offset, diff) - } - - /// Wrap a specified node in the current stack frame (indexed from the back, - /// not including trivia). - pub fn wrap(&mut self, index: usize, kind: NodeKind) { - let index = self.node_index_from_back(index).unwrap(); - let child = std::mem::take(&mut self.children[index]); - let item = GreenNode::with_child(kind, child.len(), child); - self.children[index] = item.into(); + success } /// Eat and wrap the next token. pub fn convert(&mut self, kind: NodeKind) { self.eat(); self.children.last_mut().unwrap().set_kind(kind); - self.success = true; - } - - /// Wrap the last `amount` children in the current stack frame with a new - /// node. - pub fn convert_with(&mut self, amount: usize, kind: NodeKind) { - let preserved: Vec<_> = - self.children.drain(self.children.len() - amount ..).collect(); - let len = preserved.iter().map(|c| c.len()).sum(); - self.children - .push(GreenNode::with_children(kind, len, preserved).into()); - self.success = true; } /// End the current node and undo its existence, inling all accumulated @@ -209,50 +219,14 @@ impl<'s> Parser<'s> { let outer = self.stack.pop().unwrap(); let children = std::mem::replace(&mut self.children, outer); self.children.extend(children); - self.success = true; } - /// End the current node and undo its existence, deleting all accumulated - /// children. - pub fn abort(&mut self, msg: impl Into) { - self.end(NodeKind::Error(ErrorPosition::Full, msg.into().into())); - self.success = false; - } - - /// This function [`Self::lift`]s if the last operation was unsuccessful and - /// returns whether it did. - pub fn may_lift_abort(&mut self) -> bool { - if !self.success { - self.lift(); - self.success = false; - true - } else { - false - } - } - - /// This function [`Self::end`]s if the last operation was unsuccessful and - /// returns whether it did. - pub fn may_end_abort(&mut self, kind: NodeKind) -> bool { - if !self.success { - self.end(kind); - self.success = false; - true - } else { - false - } - } - - /// End the current node as a node of given `kind` if the last parse was - /// successful, otherwise, abort. - pub fn end_or_abort(&mut self, kind: NodeKind) -> bool { - if self.success { - self.end(kind); - true - } else { - self.may_end_abort(kind); - false - } + /// Add an error to the current children list. + fn push_error(&mut self, msg: impl Into) { + self.children.push( + GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0) + .into(), + ); } /// End the parsing process and return the last child. @@ -268,14 +242,6 @@ impl<'s> Parser<'s> { self.peek().is_none() } - /// Consume the next token and return its kind. - // NOTE: This isn't great. - fn eat_peeked(&mut self) -> Option { - let token = self.peek()?.clone(); - self.eat(); - Some(token) - } - /// Consume the next token if it is the given one. pub fn eat_if(&mut self, t: &NodeKind) -> bool { if self.peek() == Some(t) { @@ -311,9 +277,9 @@ impl<'s> Parser<'s> { /// Consume the next token, debug-asserting that it is one of the given ones. pub fn eat_assert(&mut self, t: &NodeKind) { - // NOTE: assert with peek(), then eat() - let next = self.eat_peeked(); - debug_assert_eq!(next.as_ref(), Some(t)); + let next = self.peek(); + debug_assert_eq!(next, Some(t)); + self.eat(); } /// Consume tokens while the condition is true. @@ -402,9 +368,10 @@ impl<'s> Parser<'s> { /// End the parsing of a group. /// /// This panics if no group was started. - pub fn end_group(&mut self) { + pub fn end_group(&mut self) -> ParseResult { let prev_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); + let mut success = true; self.tokens.set_mode(group.prev_mode); self.repeek(); @@ -424,8 +391,8 @@ impl<'s> Parser<'s> { self.eat(); rescan = false; } else if required { - self.start(); - self.abort(format!("expected {}", end)); + self.push_error(format!("expected {}", end)); + success = false; } } @@ -448,6 +415,8 @@ impl<'s> Parser<'s> { self.next = self.tokens.next(); self.repeek(); } + + if success { Ok(()) } else { Err(()) } } /// Add an error that `what` was expected at the given span. @@ -460,39 +429,36 @@ impl<'s> Parser<'s> { found = i; } - self.expected_at_child(found, what); - } - - /// Add an error that `what` was expected at the given child index. - pub fn expected_at_child(&mut self, index: usize, what: &str) { - self.children.insert( - index, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); + Marker(found).expected_at(self, what); } /// Eat the next token and add an error that it is not the expected `thing`. pub fn expected(&mut self, what: &str) { - self.start(); - match self.eat_peeked() { - Some(found) => self.abort(format!("expected {}, found {}", what, found)), - None => { - self.lift(); - self.expected_at(what); + match self.peek().cloned() { + Some(found) => { + self.start(); + self.eat(); + self.end(NodeKind::Error( + ErrorPosition::Full, + format!("expected {}, found {}", what, found).into(), + )); } + None => self.expected_at(what), } } /// Eat the next token and add an error that it is unexpected. pub fn unexpected(&mut self) { - self.start(); - match self.eat_peeked() { - Some(found) => self.abort(format!("unexpected {}", found)), - None => self.abort("unexpected end of file"), + match self.peek().cloned() { + Some(found) => { + self.start(); + self.eat(); + self.end(NodeKind::Error( + ErrorPosition::Full, + format!("unexpected {}", found).into(), + )); + } + None => self.push_error("unexpected end of file"), } } @@ -584,20 +550,8 @@ impl<'s> Parser<'s> { self.children.last() } - /// Whether the last operation was successful. - pub fn success(&mut self) -> bool { - let s = self.success; - self.success = true; - s - } - - /// Declare the last operation as unsuccessful. - pub fn unsuccessful(&mut self) { - self.success = false; - } - - /// Amount of children in the current stack frame. - pub fn child_count(&self) -> usize { - self.children.len() + /// Create a new marker. + pub fn marker(&mut self) -> Marker { + Marker(self.children.len()) } } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 9ad04be58..b6f64c677 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -156,13 +156,7 @@ impl HeadingNode { /// The section depth (numer of equals signs). pub fn level(&self) -> u8 { - self.0 - .children() - .find_map(|node| match node.kind() { - NodeKind::HeadingLevel(heading) => Some(*heading), - _ => None, - }) - .expect("heading node is missing heading level") + self.0.children().filter(|n| n.kind() == &NodeKind::Eq).count() as u8 } } @@ -743,7 +737,7 @@ impl TypedNode for CallArg { NodeKind::Named => Some(CallArg::Named( node.cast().expect("named call argument is missing name"), )), - NodeKind::ParameterSink => Some(CallArg::Spread( + NodeKind::Spread => Some(CallArg::Spread( node.cast_first_child() .expect("call argument sink is missing expression"), )), @@ -825,7 +819,7 @@ impl TypedNode for ClosureParam { NodeKind::Named => Some(ClosureParam::Named( node.cast().expect("named closure parameter is missing name"), )), - NodeKind::ParameterSink => Some(ClosureParam::Sink( + NodeKind::Spread => Some(ClosureParam::Sink( node.cast_first_child() .expect("closure parameter sink is missing identifier"), )), diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 112fc220f..db3b0c9ab 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -487,8 +487,6 @@ pub enum NodeKind { Emph, /// A section heading: `= Introduction`. Heading, - /// A heading's level: `=`, `==`, `===`, etc. - HeadingLevel(u8), /// An item in an enumeration (ordered list): `1. ...`. Enum, /// A numbering: `23.`. @@ -546,7 +544,7 @@ pub enum NodeKind { /// A closure's parameters: `(x, y)`. ClosureParams, /// A parameter sink: `..x`. - ParameterSink, + Spread, /// A template expression: `[*Hi* there!]`. Template, /// A block expression: `{ let x = 1; x + 2 }`. @@ -709,7 +707,6 @@ impl NodeKind { Self::Strong => "strong", Self::Emph => "emphasis", Self::Heading => "heading", - Self::HeadingLevel(_) => "heading level", Self::Enum => "enumeration item", Self::EnumNumbering(_) => "enumeration item numbering", Self::List => "list item", @@ -735,7 +732,7 @@ impl NodeKind { Self::CallArgs => "call arguments", Self::Closure => "closure", Self::ClosureParams => "closure parameters", - Self::ParameterSink => "parameter sink", + Self::Spread => "parameter sink", Self::Template => "template", Self::Block => "block", Self::ForExpr => "for-loop expression", diff --git a/tests/typ/code/let.typ b/tests/typ/code/let.typ index 3f3f9d357..cd7531b76 100644 --- a/tests/typ/code/let.typ +++ b/tests/typ/code/let.typ @@ -56,7 +56,7 @@ Three #let v4 = 4 Four // Terminated by semicolon even though we are in a paren group. -// Error: 19 expected expression +// Error: 18 expected expression // Error: 19 expected closing paren #let v5 = (1, 2 + ; Five From cf2e527a026e81269ef716b4d6675ae6d981d681 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Fri, 5 Nov 2021 12:53:52 +0100 Subject: [PATCH 12/18] Code Review: No Patrick, question marks are not an instrument --- src/eval/walk.rs | 2 +- src/parse/mod.rs | 417 ++++++++++++++++------------------- src/parse/parser.rs | 133 +++++------ src/syntax/ast.rs | 6 +- src/syntax/mod.rs | 15 +- tests/ref/markup/heading.png | Bin 6611 -> 6406 bytes tests/typ/markup/heading.typ | 4 +- 7 files changed, 260 insertions(+), 317 deletions(-) diff --git a/src/eval/walk.rs b/src/eval/walk.rs index ff73f9f90..1656929b8 100644 --- a/src/eval/walk.rs +++ b/src/eval/walk.rs @@ -76,7 +76,7 @@ impl Walk for HeadingNode { ctx.template.save(); ctx.template.modify(move |style| { let text = style.text_mut(); - let upscale = 1.6 - 0.1 * level as f64; + let upscale = (1.6 - 0.1 * level as f64).max(0.75); text.size *= upscale; text.strong = true; }); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 92220eaab..21ca303ed 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -14,9 +14,8 @@ use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; -use crate::util::EcoString; -type ParseResult = Result<(), ()>; +type ParseResult = Result; /// Parse a source file. pub fn parse(source: &str) -> Rc { @@ -52,12 +51,11 @@ fn markup_while(p: &mut Parser, mut at_start: bool, f: &mut F) where F: FnMut(&mut Parser) -> bool, { - p.start(); - while !p.eof() && f(p) { - markup_node(p, &mut at_start); - } - - p.end(NodeKind::Markup); + p.perform(NodeKind::Markup, |p| { + while !p.eof() && f(p) { + markup_node(p, &mut at_start).ok(); + } + }); } /// Parse a markup node. @@ -91,7 +89,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { | NodeKind::Raw(_) | NodeKind::UnicodeEscape(_) => { p.eat(); - Ok(()) } NodeKind::Eq if *at_start => heading(p), @@ -101,7 +98,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { // Line-based markup that is not currently at the start of the line. NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { p.convert(NodeKind::Text(p.peek_src().into())); - Ok(()) } // Hashtag + keyword / identifier. @@ -120,7 +116,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { if stmt && res.is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } - p.end_group() + p.end_group(); } // Block and template. @@ -135,58 +131,46 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { NodeKind::Error(_, _) => { p.eat(); - Ok(()) } _ => { p.unexpected(); - Err(()) + return Err(()); } - }?; + }; *at_start = false; Ok(()) } /// Parse a heading. -fn heading(p: &mut Parser) -> ParseResult { - p.start(); - p.eat_assert(&NodeKind::Eq); +fn heading(p: &mut Parser) { + p.perform(NodeKind::Heading, |p| { + p.eat_assert(&NodeKind::Eq); - // Count depth. - let mut level: usize = 1; - while p.eat_if(&NodeKind::Eq) { - level += 1; - } + while p.eat_if(&NodeKind::Eq) {} - if level > 6 { - p.end(NodeKind::Text(EcoString::from('=').repeat(level))); - } else { let column = p.column(p.prev_end()); markup_indented(p, column); - p.end(NodeKind::Heading); - } - Ok(()) + }); } /// Parse a single list item. -fn list_node(p: &mut Parser) -> ParseResult { - p.start(); - p.eat_assert(&NodeKind::ListBullet); - let column = p.column(p.prev_end()); - markup_indented(p, column); - p.end(NodeKind::List); - Ok(()) +fn list_node(p: &mut Parser) { + p.perform(NodeKind::List, |p| { + p.eat_assert(&NodeKind::ListBullet); + let column = p.column(p.prev_end()); + markup_indented(p, column); + }); } /// Parse a single enum item. -fn enum_node(p: &mut Parser) -> ParseResult { - p.start(); - p.eat(); - let column = p.column(p.prev_end()); - markup_indented(p, column); - p.end(NodeKind::Enum); - Ok(()) +fn enum_node(p: &mut Parser) { + p.perform(NodeKind::Enum, |p| { + p.eat(); + let column = p.column(p.prev_end()); + markup_indented(p, column); + }); } /// Parse an expression. @@ -224,7 +208,7 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { p.peek_direct(), Some(NodeKind::LeftParen | NodeKind::LeftBracket) ) { - call(p, &marker); + call(p, &marker)?; continue; } @@ -255,19 +239,14 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { Associativity::Right => {} } - if expr_with(p, atomic, prec).is_err() { - break Ok(()); - } - - marker.end(p, NodeKind::Binary); + marker.perform(p, NodeKind::Binary, |p| expr_with(p, atomic, prec))?; } } /// Parse a primary expression. fn primary(p: &mut Parser, atomic: bool) -> ParseResult { - let lit = literal(p); - if lit.is_ok() { - return lit; + if literal(p) { + return Ok(()); } match p.peek() { @@ -282,9 +261,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { marker.end(p, NodeKind::ClosureParams); p.eat(); - let e = expr(p); - marker.end(p, NodeKind::Closure); - e + marker.perform(p, NodeKind::Closure, expr) } else { Ok(()) } @@ -292,8 +269,14 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { // Structures. Some(NodeKind::LeftParen) => parenthesized(p), - Some(NodeKind::LeftBracket) => template(p), - Some(NodeKind::LeftBrace) => block(p), + Some(NodeKind::LeftBracket) => { + template(p); + Ok(()) + } + Some(NodeKind::LeftBrace) => { + block(p); + Ok(()) + } // Keywords. Some(NodeKind::Let) => let_expr(p), @@ -317,7 +300,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { } /// Parse a literal. -fn literal(p: &mut Parser) -> ParseResult { +fn literal(p: &mut Parser) -> bool { match p.peek() { // Basic values. Some( @@ -333,10 +316,10 @@ fn literal(p: &mut Parser) -> ParseResult { | NodeKind::Str(_), ) => { p.eat(); - Ok(()) + true } - _ => Err(()), + _ => false, } } @@ -364,10 +347,7 @@ fn parenthesized(p: &mut Parser) -> ParseResult { p.eat_assert(&NodeKind::Arrow); - let r = expr(p); - - marker.end(p, NodeKind::Closure); - return r; + return marker.perform(p, NodeKind::Closure, expr); } // Find out which kind of collection this is. @@ -439,37 +419,35 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { } /// Parse an expression or a named pair. Returns if this is a named pair. -fn item(p: &mut Parser) -> Result { +fn item(p: &mut Parser) -> ParseResult { let marker = p.marker(); if p.eat_if(&NodeKind::Dots) { - let r = expr(p); - - marker.end(p, NodeKind::Spread); - return r.map(|_| NodeKind::Spread); + return marker + .perform(p, NodeKind::Spread, |p| expr(p).map(|_| NodeKind::Spread)); } let ident_marker = p.marker(); - if expr(p).is_err() { - return Err(()); - } + expr(p)?; if p.peek() == Some(&NodeKind::Colon) { - let r = if matches!(p.child(0).unwrap().kind(), &NodeKind::Ident(_)) { - p.eat(); - expr(p) - } else { - ident_marker.end( - p, - NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), - ); - p.eat(); + marker.perform(p, NodeKind::Named, |p| { + if matches!( + ident_marker.child_at(p).unwrap().kind(), + &NodeKind::Ident(_) + ) { + p.eat(); + expr(p).map(|_| NodeKind::Named) + } else { + ident_marker.end( + p, + NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), + ); + p.eat(); - expr(p); - Err(()) - }; - - marker.end(p, NodeKind::Named); - r.map(|_| NodeKind::Named) + expr(p).ok(); + Err(()) + } + }) } else { Ok(p.last_child().unwrap().kind().clone()) } @@ -478,23 +456,16 @@ fn item(p: &mut Parser) -> Result { /// Convert a collection into an array, producing errors for anything other than /// expressions. fn array(p: &mut Parser, marker: &Marker) -> ParseResult { - marker.filter_children( - p, - |x| match x.kind() { - NodeKind::Named | NodeKind::Spread => false, - _ => true, - }, - |kind| match kind { - NodeKind::Named => ( - ErrorPosition::Full, - "expected expression, found named pair".into(), - ), - NodeKind::Spread => { - (ErrorPosition::Full, "spreading is not allowed here".into()) - } - _ => unreachable!(), - }, - ); + marker.filter_children(p, |x| match x.kind() { + NodeKind::Named => Err(( + ErrorPosition::Full, + "expected expression, found named pair".into(), + )), + NodeKind::Spread => { + Err((ErrorPosition::Full, "spreading is not allowed here".into())) + } + _ => Ok(()), + }); marker.end(p, NodeKind::Array); Ok(()) @@ -503,24 +474,17 @@ fn array(p: &mut Parser, marker: &Marker) -> ParseResult { /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. fn dict(p: &mut Parser, marker: &Marker) -> ParseResult { - marker.filter_children( - p, - |x| { - x.kind() == &NodeKind::Named - || x.kind().is_paren() - || x.kind() == &NodeKind::Comma - || x.kind() == &NodeKind::Colon - }, - |kind| match kind { - NodeKind::Spread => { - (ErrorPosition::Full, "spreading is not allowed here".into()) - } - _ => ( - ErrorPosition::Full, - "expected named pair, found expression".into(), - ), - }, - ); + marker.filter_children(p, |x| match x.kind() { + NodeKind::Named | NodeKind::Comma | NodeKind::Colon => Ok(()), + NodeKind::Spread => { + Err((ErrorPosition::Full, "spreading is not allowed here".into())) + } + _ if x.kind().is_paren() => Ok(()), + _ => Err(( + ErrorPosition::Full, + "expected named pair, found expression".into(), + )), + }); marker.end(p, NodeKind::Dict); Ok(()) @@ -529,96 +493,90 @@ fn dict(p: &mut Parser, marker: &Marker) -> ParseResult { /// Convert a collection into a list of parameters, producing errors for /// anything other than identifiers, spread operations and named pairs. fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { - marker.filter_children( - p, - |x| match x.kind() { - NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => true, - NodeKind::Spread => matches!( - x.children().last().map(|x| x.kind()), - Some(&NodeKind::Ident(_)) - ), - _ => false, - } - || (allow_parens && x.kind().is_paren()), - |_| (ErrorPosition::Full, "expected identifier".into()), - ); + marker.filter_children(p, |x| match x.kind() { + NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()), + NodeKind::Spread + if matches!( + x.children().last().map(|x| x.kind()), + Some(&NodeKind::Ident(_)) + ) => + { + Ok(()) + } + _ if allow_parens && x.kind().is_paren() => Ok(()), + _ => Err((ErrorPosition::Full, "expected identifier".into())), + }); } // Parse a template block: `[...]`. -fn template(p: &mut Parser) -> ParseResult { - p.start(); - p.start_group(Group::Bracket, TokenMode::Markup); - markup(p); - p.end_group(); - p.end(NodeKind::Template); - Ok(()) +fn template(p: &mut Parser) { + p.perform(NodeKind::Template, |p| { + p.start_group(Group::Bracket, TokenMode::Markup); + markup(p); + p.end_group(); + }); } /// Parse a code block: `{...}`. -fn block(p: &mut Parser) -> ParseResult { - p.start(); - p.start_group(Group::Brace, TokenMode::Code); - while !p.eof() { - p.start_group(Group::Stmt, TokenMode::Code); - if expr(p).is_ok() { - if !p.eof() { +fn block(p: &mut Parser) { + p.perform(NodeKind::Block, |p| { + p.start_group(Group::Brace, TokenMode::Code); + while !p.eof() { + p.start_group(Group::Stmt, TokenMode::Code); + if expr(p).is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } + p.end_group(); + + // Forcefully skip over newlines since the group's contents can't. + p.eat_while(|t| matches!(t, NodeKind::Space(_))); } p.end_group(); - - // Forcefully skip over newlines since the group's contents can't. - p.eat_while(|t| matches!(t, NodeKind::Space(_))); - } - p.end_group(); - p.end(NodeKind::Block); - Ok(()) + }); } /// Parse a function call. fn call(p: &mut Parser, callee: &Marker) -> ParseResult { - let res = match p.peek_direct() { - Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => args(p, true), + callee.perform(p, NodeKind::Call, |p| match p.peek_direct() { + Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => { + args(p, true); + Ok(()) + } _ => { p.expected_at("argument list"); Err(()) } - }; - - callee.end(p, NodeKind::Call); - res + }) } /// Parse the arguments to a function call. -fn args(p: &mut Parser, allow_template: bool) -> ParseResult { - p.start(); - if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { - p.start_group(Group::Paren, TokenMode::Code); - collection(p); - p.end_group(); - } +fn args(p: &mut Parser, allow_template: bool) { + p.perform(NodeKind::CallArgs, |p| { + if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { + p.start_group(Group::Paren, TokenMode::Code); + collection(p); + p.end_group(); + } - while allow_template && p.peek_direct() == Some(&NodeKind::LeftBracket) { - template(p); - } - - p.end(NodeKind::CallArgs); - Ok(()) + while allow_template && p.peek_direct() == Some(&NodeKind::LeftBracket) { + template(p); + } + }) } /// Parse a with expression. fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { - p.eat_assert(&NodeKind::With); + marker.perform(p, NodeKind::WithExpr, |p| { + p.eat_assert(&NodeKind::With); - let res = if p.peek() == Some(&NodeKind::LeftParen) { - args(p, false) - } else { - p.expected("argument list"); - Err(()) - }; - - marker.end(p, NodeKind::WithExpr); - res + if p.peek() == Some(&NodeKind::LeftParen) { + args(p, false); + Ok(()) + } else { + p.expected("argument list"); + Err(()) + } + }) } /// Parse a let expression. @@ -630,17 +588,17 @@ fn let_expr(p: &mut Parser) -> ParseResult { ident(p)?; if p.peek() == Some(&NodeKind::With) { - with_expr(p, &marker); + with_expr(p, &marker)?; } else { // If a parenthesis follows, this is a function definition. let has_params = if p.peek_direct() == Some(&NodeKind::LeftParen) { - p.start(); - p.start_group(Group::Paren, TokenMode::Code); - let marker = p.marker(); - collection(p); - params(p, &marker, true); - p.end_group(); - p.end(NodeKind::ClosureParams); + p.perform(NodeKind::ClosureParams, |p| { + p.start_group(Group::Paren, TokenMode::Code); + let marker = p.marker(); + collection(p); + params(p, &marker, true); + p.end_group(); + }); true } else { false @@ -699,13 +657,10 @@ fn for_expr(p: &mut Parser) -> ParseResult { p.eat_assert(&NodeKind::For); for_pattern(p)?; - if p.eat_expect(&NodeKind::In) { - expr(p)?; - body(p)?; - Ok(()) - } else { - Err(()) - } + p.eat_expect(&NodeKind::In)?; + expr(p)?; + body(p)?; + Ok(()) }) } @@ -723,44 +678,42 @@ fn for_pattern(p: &mut Parser) -> ParseResult { /// Parse an import expression. fn import_expr(p: &mut Parser) -> ParseResult { - p.start(); - p.eat_assert(&NodeKind::Import); + p.perform(NodeKind::ImportExpr, |p| { + p.eat_assert(&NodeKind::Import); - if !p.eat_if(&NodeKind::Star) { - // This is the list of identifiers scenario. - p.start(); - p.start_group(Group::Imports, TokenMode::Code); - let marker = p.marker(); - let items = collection(p).1; - if items == 0 { - p.expected_at("import items"); + if !p.eat_if(&NodeKind::Star) { + // This is the list of identifiers scenario. + p.perform(NodeKind::ImportItems, |p| { + p.start_group(Group::Imports, TokenMode::Code); + let marker = p.marker(); + let items = collection(p).1; + if items == 0 { + p.expected_at("import items"); + } + p.end_group(); + + marker.filter_children(p, |n| match n.kind() { + NodeKind::Ident(_) | NodeKind::Comma => Ok(()), + _ => Err((ErrorPosition::Full, "expected identifier".into())), + }); + }); + }; + + if p.eat_expect(&NodeKind::From).is_ok() { + expr(p)?; } - p.end_group(); - marker.filter_children( - p, - |n| matches!(n.kind(), NodeKind::Ident(_) | NodeKind::Comma), - |_| (ErrorPosition::Full, "expected identifier".into()), - ); - p.end(NodeKind::ImportItems); - }; - - if p.eat_expect(&NodeKind::From) { - expr(p); - } - - p.end(NodeKind::ImportExpr); - Ok(()) + Ok(()) + }) } /// Parse an include expression. fn include_expr(p: &mut Parser) -> ParseResult { - p.start(); - p.eat_assert(&NodeKind::Include); - - expr(p); - p.end(NodeKind::IncludeExpr); - Ok(()) + p.perform(NodeKind::IncludeExpr, |p| { + p.eat_assert(&NodeKind::Include); + expr(p)?; + Ok(()) + }) } /// Parse an identifier. @@ -784,7 +737,9 @@ fn body(p: &mut Parser) -> ParseResult { Some(NodeKind::LeftBrace) => block(p), _ => { p.expected_at("body"); - Err(()) + return Err(()); } } + + Ok(()) } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index bc028876c..3813ee840 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -62,28 +62,24 @@ pub struct Marker(usize); impl Marker { /// Wraps all children in front of the marker. pub fn end(&self, p: &mut Parser, kind: NodeKind) { - if p.children.len() != self.0 { - let stop_nl = p.stop_at_newline(); - let end = (self.0 .. p.children.len()) - .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) - .unwrap_or(self.0) - + 1; + let stop_nl = p.stop_at_newline(); + let end = (self.0 .. p.children.len()) + .rev() + .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .unwrap_or(self.0) + + 1; - let children: Vec<_> = p.children.drain(self.0 .. end).collect(); - let len = children.iter().map(Green::len).sum(); - p.children - .insert(self.0, GreenNode::with_children(kind, len, children).into()); - } + let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + p.children + .insert(self.0, GreenNode::with_children(kind, children).into()); } /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children(&self, p: &mut Parser, f: F, error: G) + pub fn filter_children(&self, p: &mut Parser, f: F) where - F: Fn(&Green) -> bool, - G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, { - p.filter_children(self, f, error) + p.filter_children(self, f) } /// Insert an error message that `what` was expected at the marker position. @@ -97,6 +93,20 @@ impl Marker { .into(), ); } + + /// Return a reference to the child after the marker. + pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) + } + + pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } } impl<'s> Parser<'s> { @@ -121,58 +131,31 @@ impl<'s> Parser<'s> { /// /// Each start call has to be matched with a call to `end`, /// `end_with_custom_children`, `lift`, `abort`, or `end_or_abort`. - pub fn start(&mut self) { + fn start(&mut self) { self.stack.push(std::mem::take(&mut self.children)); } /// Filter the last children using the given predicate. - fn filter_children(&mut self, count: &Marker, f: F, error: G) + fn filter_children(&mut self, count: &Marker, f: F) where - F: Fn(&Green) -> bool, - G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, { for child in &mut self.children[count.0 ..] { if !((self.tokens.mode() != TokenMode::Code || Self::skip_type_ext(child.kind(), false)) - || child.kind().is_error() - || f(&child)) + || child.kind().is_error()) { - let (pos, msg) = error(child.kind()); - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner.len(), inner) - .into(); + if let Err((pos, msg)) = f(child) { + let inner = std::mem::take(child); + *child = + GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); + } } } } - /// Return the a child from the current stack frame specified by its - /// non-trivia index from the back. - pub fn child(&self, child: usize) -> Option<&Green> { - self.node_index_from_back(child).map(|i| &self.children[i]) - } - - /// Map a non-trivia index from the back of the current stack frame to a - /// normal index. - fn node_index_from_back(&self, child: usize) -> Option { - let len = self.children.len(); - let code = self.tokens.mode() == TokenMode::Code; - let mut seen = 0; - for x in (0 .. len).rev() { - if self.skip_type(self.children[x].kind()) && code { - continue; - } - if seen == child { - return Some(x); - } - seen += 1; - } - - None - } - /// End the current node as a node of given `kind`. - pub fn end(&mut self, kind: NodeKind) { + fn end(&mut self, kind: NodeKind) { let outer = self.stack.pop().unwrap(); let mut children = std::mem::replace(&mut self.children, outer); @@ -191,15 +174,13 @@ impl<'s> Parser<'s> { remains.reverse(); } - let len = children.iter().map(|c| c.len()).sum(); - self.children - .push(GreenNode::with_children(kind, len, children).into()); + self.children.push(GreenNode::with_children(kind, children).into()); self.children.extend(remains); } - pub fn perform(&mut self, kind: NodeKind, f: F) -> ParseResult + pub fn perform(&mut self, kind: NodeKind, f: F) -> T where - F: FnOnce(&mut Self) -> ParseResult, + F: FnOnce(&mut Self) -> T, { self.start(); let success = f(self); @@ -267,12 +248,12 @@ impl<'s> Parser<'s> { /// Consume the next token if it is the given one and produce an error if /// not. - pub fn eat_expect(&mut self, t: &NodeKind) -> bool { + pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult { let eaten = self.eat_if(t); if !eaten { self.expected_at(t.as_str()); } - eaten + if eaten { Ok(()) } else { Err(()) } } /// Consume the next token, debug-asserting that it is one of the given ones. @@ -368,10 +349,9 @@ impl<'s> Parser<'s> { /// End the parsing of a group. /// /// This panics if no group was started. - pub fn end_group(&mut self) -> ParseResult { + pub fn end_group(&mut self) { let prev_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); - let mut success = true; self.tokens.set_mode(group.prev_mode); self.repeek(); @@ -392,7 +372,6 @@ impl<'s> Parser<'s> { rescan = false; } else if required { self.push_error(format!("expected {}", end)); - success = false; } } @@ -415,8 +394,6 @@ impl<'s> Parser<'s> { self.next = self.tokens.next(); self.repeek(); } - - if success { Ok(()) } else { Err(()) } } /// Add an error that `what` was expected at the given span. @@ -436,12 +413,13 @@ impl<'s> Parser<'s> { pub fn expected(&mut self, what: &str) { match self.peek().cloned() { Some(found) => { - self.start(); - self.eat(); - self.end(NodeKind::Error( - ErrorPosition::Full, - format!("expected {}, found {}", what, found).into(), - )); + self.perform( + NodeKind::Error( + ErrorPosition::Full, + format!("expected {}, found {}", what, found).into(), + ), + Self::eat, + ); } None => self.expected_at(what), } @@ -451,12 +429,13 @@ impl<'s> Parser<'s> { pub fn unexpected(&mut self) { match self.peek().cloned() { Some(found) => { - self.start(); - self.eat(); - self.end(NodeKind::Error( - ErrorPosition::Full, - format!("unexpected {}", found).into(), - )); + self.perform( + NodeKind::Error( + ErrorPosition::Full, + format!("unexpected {}", found).into(), + ), + Self::eat, + ); } None => self.push_error("unexpected end of file"), } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index b6f64c677..1198d6b1e 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -156,7 +156,11 @@ impl HeadingNode { /// The section depth (numer of equals signs). pub fn level(&self) -> u8 { - self.0.children().filter(|n| n.kind() == &NodeKind::Eq).count() as u8 + self.0 + .children() + .filter(|n| n.kind() == &NodeKind::Eq) + .count() + .min(u8::MAX.into()) as u8 } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index db3b0c9ab..363cbe6e9 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -98,15 +98,20 @@ pub struct GreenNode { impl GreenNode { /// Creates a new node with the given kind and children. - pub fn with_children(kind: NodeKind, len: usize, children: Vec) -> Self { - let mut data = GreenData::new(kind, len); - data.erroneous |= children.iter().any(|c| c.erroneous()); + pub fn with_children(kind: NodeKind, children: Vec) -> Self { + let mut data = GreenData::new(kind, 0); + let len = children + .iter() + .inspect(|c| data.erroneous |= c.erroneous()) + .map(Green::len) + .sum(); + data.len = len; Self { data, children } } /// Creates a new node with the given kind and a single child. - pub fn with_child(kind: NodeKind, len: usize, child: impl Into) -> Self { - Self::with_children(kind, len, vec![child.into()]) + pub fn with_child(kind: NodeKind, child: impl Into) -> Self { + Self::with_children(kind, vec![child.into()]) } /// The node's children. diff --git a/tests/ref/markup/heading.png b/tests/ref/markup/heading.png index ca52644bb0790b91907d0b0ebb266fce1d67675b..c33da4206beb2755941d314833f67db84dcbe501 100644 GIT binary patch literal 6406 zcmai(2QZvn+sE%^aqGK5^s=lLqW9=^1&JpTiC&lJo#<^5eXSm%ghwJu{s3nloi@c=(r*wVp!AM&o(@Iv zg!H;GTQt`=6fZBwxPDEgDw)1WYRV z&mp3hSK;DYody6Ky>J>}*Ad#1M0MB#s)h$Zamz}N7HxFyLy%v_;i!Y^m52}wj2%-u zqtNvfh*~tnIhf`2J@7`GnIefdgroMBHhJGNs&{+vJ$Ug+NSO%O3h2uLuDDpyK5zO` za_{+G_#opq}VR4S$EIM>N{4 z>v?jNW^u21W9a~nSY4wuz?P4yS5KwagAZjz|E!JrcWL@sYAIR)d+V>jL|e%G=K!mq zB(GqGoIm&2X`))!nyU!kggK#W2OQ=C@ zd68pWPg9M%MPt1BDEE>eM{DcRXx!olis#!Rx$Csi$Uoj6DHL7h06<)Ac%g0N=b_ou z9j7($AgONrxT6wQowg(XR$2FU9{;!{seVeef!ucu*&X?VZTTc)k{`r1 z?ChZZYell3*j62;3hol|vQDla5k=q~+uuOb9M*0*ei$ z-drXv7K3r%BdoN5-2pG1UxA1q!5$o|ZNKK?hCCrB6gfAJ-WJ8M0pZVSPt{;2!h1}C z(Z&Xj6A3|cX^5`-%!HiWev)@~chadN;Tk_xSjGJ)KgyNX`r|ymesm?)gq~jp zpw&cxmHbD++JNF+<3_i4jFPtoUaH;swI7l*mI(E@#?cut_)~hIgf*R_EcX< z-DU@ur$)gMP4Ct3wSMJm%^AoME4|UcWg=etN3{FK!S{9yEVhs|{NZMB3GYqB_zzE5 zSaVL+R3_Y#O2&vp!8D4G!q%4F(Ss~Y9#kM^97QEhI=xQ^!@zh@tNQGIZ@Pw0+UqB>$6er;#A1 zx2e>9zVWcslS?tx5FTniqw6)ryAWW=(09mD5Wo3SO*!mWS$)T~p(icZ6w2sH*|uoy zy#Bu*KOUn#?Qx}cUlzKX-hWG^ZnZYcDvMq5RZ*%sUgJ_ALVW+BM9^dY)q#wNN2rMa zW(^)iUe@R}=1?u$A?L?mOL!@6`#_n*!PjLQ4qzKg#CaABI za$s<+JQS}ZX~fUw#FF2Yj(SGKXu?PsFN+7p!}PvLx_=yczLch7O;t*10v##*Yww35 zFmCQyHiK%S6t|82Ko>sTrBD49-|Jn*;^Y!)^dl z9X`;4gfF`pOAi$)LZ1^U@&PK~#u?XzYDhPxN(}zAbNr*dB4bBZlI|QT6c-uw(Qg_Q zBLhtM7iiJ6CAjde3|fHWB`F|t@Upw`&Nl^3Ql9+o+XBzK6Eg-`eIM?!f8LR3XbB3~ zj#vXnspZ8?D8F?+fi>#t$z6&mmuGc`{H7+Gs#YZe#0#+ zR|r>B>~o84GV`>Cl+v5by{kX#`@}Wkz4_mxMmyrl=zcFSx@o-)%PJ~2 z+S+FsaG|OjfQ-o)d5VCyXo!~pp+Tu^x2It$Feh!Q>n>)Ofi!$9d;E0AlqYScWh5Uj z6OSdgJ@fUI&LlDG=Co^C^DL`?mH@3IO9aa4;SQe6ReZH~93F3aot=VVN6RC{1rt^8 zv#BoZ=aBLuSs&2ma@EFnUo#54s;YN{h;82~j>zJ)c&YryXL4#l+xyn0}M z@+#4k#rH-|>FiIXuc8^VU(FtQNxyQknp+8HT2Pwe7+G65R$n#DiGSp^pZHF(^|#sD z)BLxTa`Kb_GCNs~D=@l7w7hX#)pZ7C?lz$sxgG@HzxCgo#)S#7V*m*%qvt;#XwGT< z?KD~>X};#$STijMd<+e{gUR#OuSpYH6Gt49;t+J&*m8`R73z6V4wW_OK)|B> zZ+h5?550#bWyx=`w4nkBnHHvlE_!&XNvU@%Cv?ep#VxTaoVv58Jf8lU=1}d0+vv|P zio>q?rY}%sRP#4)-+?B2_73Uar|x>Vr2l*~RkVN`!E?ItZafI?Nm`j>7?mXID7008 zzWBB`9&HH`U6)G9)wx}@{)j_e?u}LQ0na@|Ra^fA(ujQR!K2T4Pqu3MTRmwJfuLv6 zvO&}vN_z33OI~F*o_afl@$^`tDZJ}>^O($-pt@3KVKU6P3S#np$X?)PC*#JY&f+M0 zfk`_ideR6=4f_SjqY?Dxiz7VZcekrYtc=FLW~PJ?fu+`ac*CLi#rbIbd|hV6H9J$>-FrL%4}C7 z=4vv$?_3%_We;EEK2d$;=S!RVEC1a7|K8hO=5Q3(W;W9p=U22D zHaU5Et=LLEZ)LlNQ&!hUIIR%%04o{S7q7%bHE~fuMxmWm+Kq zqX6c-5D5%2;6zitG{|9c5{c`;ZN3q=^%-;wNz%f*?+$C)He$4&6UK;RNc#Wvgu5Ob$4WwGg+wrzT67jA&WZc@OK4@ohw>l5WN zJ$^#n$!*Lvb}noC-5K7m#r0C_`jMcTD+4sT0vj%&)-T0f(AEMfbN_5Uw_#2g;l*C= z`zT?K6XjN^^G=1ae;OZ*9_Q)KUa7ESgyPRTW`0~uk-hL>%gpke+_ZHevGa9%niHjh zuRf0kQ5Y8SB@J}k4T3+~t6V#qV*Yc{FW(Jn?+oz8vO|?fd0h0HY1B8QSeF#?w;Cyb z)n-I^fP}6irON$J0m75=xU)~TX_8cHS6*{2JfqTQ?>ksOH`xx#2BmKlEm@vC+xK~F zQyurq%I-vj4|BMF-mLJ0x$>WmoJloun31OwQZddkqf7NEBNx zMp3@!9GE|7(TU67nuGFUQ<`;f{<)ir)$nge99wlq2b-eAhN{fHsc!+vdylgB*jB4T zZ;t0!wIllmHWNnH$VVmomug{yo* zbA=XXZ6R=HyvVxcE@!{!fefy_$rG7Nl*LCE&RQ5%t7z8T4}RS3>IK&fG1_<24?Vsg zlIrO}Y@qNU8KBx^D>dU49doPT^f=AfGtYN&OI}mH&w$GPOpbi2gfzfV5M|F)+MK4G zE>ivTCE7hB)-gK-CsvsY;{2y|LN?$e(osu=j9dF{sBf2|_Y(FND;PrXT|F&W>pG#? zX9vd-A+*n`)|wmcB`lQ&-M+kfHnFa^ix1T~ocoUsdZ7UPU({e; zf6{ip(d-{jrV)&tMq@HFXV;xxJTRVZIcP)F69Ohlgt#hiA*ygb5T6!K%12_G^9hTm?C%_A?;*bB*U~i6h1CEM93XGlb>67O>StR{81G&L=z!9J^GaTM24=daiX5{ne;xXVhLN-WZ%<1++ zYLyp~rk`1oEi{LlbvJEWLEqH}kN|Zj@s9L7F1`Cv1qcmjDG9dTp#akDMXF3z%C9(R zaV41+Mn1^m+1m%lFoJdD4X120?a5KwJk|I67hONGh{EvM0ma3BiI%*Wq+3}t{>+^i z)1?PR2PyLmlXpCGL8P-;k}ngDS}cs* znG)#=s1Ba=kegsmX!9@FDyAGgp4*ZmtO~9_oet|T+Fsa&;tOIKq0Lt!r0WLKN4c+_ zKN|k2md?Cm0a{%7Kl^r@0~)z0c0M(uY&2%lA?H6byKn#tJFC_($(`d=DXTyKPI#8i z``#-tg;$xmGs1Bu8*#Wk@Wn0}+zbCxy`{dN=6w@=GK0+K{8S>~YigP_L!9igff>Z;a!U-TCypgl>-RA=+;UNOa-ew9Jt#82mS^WKZ>gYw;|Ay6Iw>S?nLl=U4x^)bK6D6;d7~kK5-fl1pSZ;+E?}jqM-n9x2 zl&m*btu1z0dRkTf`N@-zVmiCwhS4E^d&=D9>tUt`e-k(Vw6dj=E;W1cEDk7Xx4xF4 zYcB%I)0XWq;Fc3AJ9s&AYB|jqs~D!$`^z(>3H*56+(!c^bRth5v&~bn*D8au5YzSn z%7vWU6+D*0n$#CX5@O(`WN?Ij)Omgn7rUbasf^25<`_M z6G`5j7KX^Ap6mMomA=WgXrhteHFq&ebyUy%&0h1O7@kiIdpllqCTelG4+rqYQuY_H zPd*jeR>0>C+O9_Pj7xm{r&k5 zU0`t4=&8!9o$3Z={maggt~oMoNB_($|1Qw&0+3J&>RUGtw_WZ!t)1ABr4!vpVw`Do z4%t8o9k4@jy}{^Eh-5YJn|bDTc`50Mf6&bGQ-=}IPZ+YYRtm1d7P7+*hT)b&veN}U zm+k>c4F~K+(cYX>IX#F-27|gF3{1c!R|_6rvt!#A!WpZ7Dnk;|t zm8FT}o`5cPr!wr}p)f}E4Z|~nKnSf`>^y%=fsnv_}~s%^EWBz?1mm z0)^!bW!Ax=ndYNt;epF418h0vlWqYI!A4T;cy0>?DSG-)$1k(+ezuubZCh#sX1xUI zuh&S3BEVBC-g{GbHENJV_X7P_2QNV+5pAoj5ykINF~_l=rr!P`9rp8c#xD#Ap0zf< zy`jEM(Q34XUd#c@4MsP2a=1`<&4~<^(Kdv)oHVguDdc#%0x{4zgtVeXrqaJ+(@L`l z*1pwo^FK3*e*+bVoKX1-`MF9#w}?bfO`$M08erWyZF(RnkBX?M1dV0WynXXD&dR9mq&wn-v)!4&9Ek?j3u)hYs|=XsGkHalxbB3wt6Z8awx&5oX+7o`V*3tOQ+e!I_7A=!T%E6RpkruaJ^A zcI&`t(X}|6X%~jQG#)js0m? zxM~tiYZ|vMV8-fz!IWp%r?~8&aei0*uN**`S>xi$;a`S;fIoqp&}7a$F|jQB`+`GX M=Z1Ebrftms0N=@Nk^lez literal 6611 zcmai32T&93x?Yk9QCfn4R7oI6ks?)!LI7z$RX}<`1O%i?lU`$^hM*uw2hm?T2xuUJ zfRX?P4AP~82uSb!a?Y81=H5H!|L@E@-_GuSv%B-X`@GNlJl|7eLv6-$T;~7)VAR#Q zVFCa&5C8zZfSz6eDkX>f0Dv=7_lBBT(D2geXh5M7>zRpB8h?QI1=9_>K)Ux}YV@xt zf3(tVB^a-9%pb;Q4^(tq#9OQVQWJz|d^df(Jt8iJsj16zpY18ojAs|Oe&_nFU^ez) zQ>kza|9_5_FYscw%tLQ&0BG=ofSa#ze)22;9gC@xB9%ep5N?CDWPVFvD0l^0bkg)Q zA_R-$!hT!knJfcEEj&lH6Urs$Cl&}QsYOjcf%TH6_z6Eid?yiBq*ZvXh%PDvH3V|i zVXEe7y^f`AeyXeu0|RX2^HFIiD-g%*`i0K-S_n=qr-(Ty7?AZ^rpXgJz^XAL^w@_K zl7)cT(MMH$AN;V^cJgb8>p+lJ;v*iKWtFBpN@2LW?(9{dD^c`7lIJ~|2zpS6 z(C633(avn{V8*CavA_)^T%92OheIjzrJ~_x^YVoF$i>?hbRP~={dhy;!~h1W9M?>T zj@^g`Qb%eYQBv;~LcV#XgHP3{7Kxv5&Kz6t7Z(SlmzSAwXbD z&F;!pnh6<9mNB1eA$X2*gUCL33xzX#(|Q-;;+c)t#B+HWqfrMzJwl52>VPPOQL~|93pL2Ki_U2XI!x|bu5qdPnGxEROMZaU{Mb$TijpQ!fOl!tP#r>v&2g5TSFRx zk^|H7;zyKQaC%>pt`@o!>$K=l+^iJGTT}Bk#^9*r6xO>><*uUBrWWyEgNzefrY;^E z!tVjsHO}|iXvWFSq0jq$^u$#jghS{j8aLz}4Cy%Tgp?$H8dL12C2z2!j{%3M?|DT2 zqS#9PwdX;b*M~q>`U-W`9v|bc{3s6Dj)VCalLG7RX=VM{}CMt6*{$oY-*QGtbgPk8IevrSqknd12hMxfiE!Q!)@dk{Tf37l`mjU&(eQEq%h-41PcVweEA~5f6>+mch#)>Ogc;*3~2JWnZz7r83DKN;vJF7DFIiHCMV#0#UyjRz{#~rvz`W-2l(|5<2j51X8Ja)iR*O`e-W!A8E~~8zr{-ZoH4(Z@ zMugT6Qdm{Y&SNJXM@pBB?#VE5zX3X%>J{L94*6Yy?aeifew-7@#+M`mZj^w{bXATg z_VoYxu6sW?vH6nUOx7)@)b`%+V(s&!OO%n1HxyLe#afg3aj^!@LH*tC5WbUY@ZBU1 z-5mOAGDw$wP7ZWbLmND57QivZ%#_x}CEbj7Zal|T3LjLtEJzT6eAsWr8(bW0?UaO; z!a3H@H*rte*Plg|b(t>dK925tTGVB*)DLZ~SDL}Gak~?LIvyV_NGcI@r&0~3k3kuXp zQ5WXmN2=`VViJi%X|(QQCfPf*XsIuH*zb({E=gMrwaHYkBgL4alY#lGM<#z$tR_D| z|4CB+Vyi~`cO#{LnWLBLFN)oUXWkRXpGK7#Ms3{0Dj4pKCt6f$Jt_Qn;(zAQ4f5!) zUZ5=_IzkyF9`(EN0fc&D@27fQU5Byd_JgEVb1-^K3-5>-NcZ;%z~t&u?frngzHv>c zVpQ6J7SyEh=0n-n8ewToXL@ivPQf8veh+)vbm7~HJ}G_E36?^@n%b^3#SNLRiuf9E zx=Iw%Aq7qgxc>M7M)SQseDk81NWyE%Q>M?NgXrnm&echkB ztWfvonO$+HG)$j|{*gG)DcVoVg{FOY|9*L1A+qglazs@NLo?Qld$8mA{WWji-wTNJ zNZ)ePruV=V(r^1%TTZfjs4zt260g&iya6Ujlqr@9BAI$VLH#P8FEbpVg3zPnASCLP zJ{MVlg?*vd1;?T-|AWOevM@fu5~U;kMC1=jI9*oE;o2+u&!6dpNwU7$8i=9xSQYZk znlsd_=(VmVkq4!^o{v;t5(R!-9vjU*Z**>cnNn0=RdCh*?`TX7z8yXB+HH8Do-+Or z-H>#Wl68ET(Q-g`W`lT zs`rvqa%B(Zx)HYCt#mbJUh+~!G7`Ss46|nz8TB0*~y1&%M07?iWBuK>)-8uX&Jkj0Ir&&~sOuiY}KXZ@IwwgWdwMC4{Xqe)|E-VMb8hax!;+1RsC zZCA8q36dTIa-oJB){Hbr_ z%F={ISW*0W0tPVA*COscAyYU!wx4tp8)vGPxM^nac6O@tRTkJ;y~|r_N}u8%^^$}H z|J2u3LvUtp^4C!He{6?85))54AI&r*1#{xModJE}=boU*Ol|yT?OxV+=U1Xsae77f zp=+7VifGz(Bv0-CM~oL?r+;Ho%xYikWJq#kc>uTyB4l+UZRN*Dy`Jop9f)HdevCs0DgtH_i9+ncYK(YqLExG9 zPF=h~v@#@y!7>4vGaY*vPDDk}*z(;HVA6iW#ExcnY>h+{HKS8_+Yda9CFT!5K7F)sJqq|%LtXndE2}%kLQIyp8 z5M35{R$((Q9?ljmUtBlk)3)~BrkchOu9NoZy87UkUZcqPN>WWu6u!&(+Q#SV(9lg6 zU~3ExuwHtuh++R3BL|HEl0()D3#Z^vy8B$3< z37+~<4(+fX9#P}}z2Gm){lBQ%MiX(gS+!N1QN*mQ2JJ z+d2N&z|RGgnjdG*4FDR|Ub_8DVQO0)@3qjNJVoyz7RClOg$@ z9_Q)2hy_*hm%)m24{L=Oq~7)n7AG+lBA=Rm8|F`1$;jRR6gEIS=tlP3(T6gnIgwCd z&feZ5?N>ugTuFQScun2vPx{oeAWk|_abG+mh+Hs?QWx%k!MTbwoF-3ZfHglFCkyBzT?og5z{^k|+63VnUfeO-dV#K43K8wSt7mG9_L6 zyst#csQkwq!}W{76kF@<JZyl``DHnMK7>pQCQnMJ+k`I)z3t}5IqDZM30KtW zOP{EK#5>aU?1HMb`-rUIg^Wq;f_3c!_>|7--U9>h`b`M6(M1qbAZvCZJl7F=N=jQP5owx}CYXQeHV{cH`<}jhB zqsZrhAEaHsqKsbLCzz!r^p^7?)Syx7qyko#X4jBB5oZD;t>O!R&-_G*x3KLAn!gw@ zSuUqs$V<{-rChlE+^*ziU^PQuKh@n$Y3~;MXGRgCy0n>)* zzaZ^;yw*IkNy~zE=alMD>=JUq?m?gPY34qz8pcw0b?jrAzowmzBNis~8?}L*%P3|1i9Zf?gw2 z{oSr#0zFm_>@SDw(7q&3t-XPwbX5mwJG&_{XuKca!G!Ad(1oTOPo9TE5MVVi3if7T z)cPCf4BG$#x+oLPz@}78yZ**SeL-$OQZ7oCFetA62AJ8>6MV_bf+T&OjHp*U9fe1K z()FoXN3!k^zQ(W~^oMHX)$|W<(NnG6N2ju7v-{RVi;xkV8oqU3IC{NiN?eF5|3}NM z>SHeopRCYN)s-)9XEcqk{^mXAZE4*1o$!+CKF(~5Xy1|G)6}W&@2Y{B0Iq|aEj8T` zLUL(?6l9yT>u?0FVv>~9?3Q`SNV-j-;|GSITAi@5xHan;;F8~9N2uRCvKl6lwX#mk z08!mKpJh9zrQQ9d*;zRcMzxPR3F1y29JW;NaD)0EkmxK^4#CLhst3LTHM;{&OSZ+& zxJB_ja0;fpp2@(+Y8W`hlDU^2z=sm(mHzgc# zS34>#MH&M!gVHUWfReQBeeNIOf}DgwI$Hs;V-gr>#Lc)D)c`(>GbO)qwP;T^WNj=P zPB&Q!993Pu@tx6HKqpza+zvor9H^^SH11@Yy^viWhfoEgQfe%wvex&eE~*PdqW$V@ zZKiTazwKZopQI{QN$8qx-tN!UJS88Mv+DbBbQ%{)-Z}iY=kbR9SFYWgkf(*DSag-d z>jyeX{V!B>*abiKaX4lUQpsA?+#l{TR4cWc_a`~mx%7r84dr|5Pe*c1lKw`~{?nF= zI5&B0jCCet2O49U88uyx`#cL05n-+XfnkM7b5|L5>|)$89(^UZrkA~)D~vscNmF=s zdH3w{s|ejz5wbBBHCJA~IMTMA55H5Ve2M5R6z(!7!U`?)a96jYdc^-!oVUBifBR3TCE_jE9Aup;^p_3Wl~}{JD%sL?@}Xro78YQY#m{>u)5LE#+kB_NBa1|w12mLVahGa?1*2mbt z@SOkK>IxH5AJqIl!6z_ipg*?N3f<6NDZ*(Sjf_C>fi7hoE^v;1(MHSy58!s*`hG&6(2C zUQ_1W8&@7liwo;!s#79zY7}B>1l^J_yj{{M7YxLBG|u_HT>k}~dD6ICEC=GrPB4lq z6lnXj>Qw$~JPxME*Z#^55u`>+NY= zw8XgH%3i8V3ogGLmH8fhn|_8wTMXlN+gZ;zl;L}YL2!m}+CD?gWuS=k3CxV!f5y*b z3B-=JYTW)VdF$^*@af%%bNLmgL_PqQ=r>tNUw+e?`Ui5lXn3ii;lr}{>aH$UHa%$X z*sD%ubYeF($PQm5j<#6z2>rB~`du7$&|~xcFfx*r!|Ho3%Au zY2HO(_|i;;`FNuRqcRrvD;0c`W#y-D!tVlB*ZAJjLH!u>28ZOcYJlIc1BjOJ3`XW3+xJtSt-xGEu|k`fwt{RG2ZA9X%F+v8Y%C)8t+G!r>RA zFB7Sc^{t}%)H~?Mb}zX*oh0+?c;c$L?ij&M#Ag9Y($R%Bv# zMBwRnKMDxPfz!J!4+!WmR);>eF7quKV8M{<>#UV83VXagk=d709$2~M5Z4sfEK0f0 z()s=^bLWzg_L9)<=N*VcK)7WNo%T^k;wQzqnO~$KMWHf#eSf|Cp#nV>KiK}-MbAmh i`}ZD-W+wsIWtMOs<-CAc3;4%wjjpEQjdJzdG5-ZZ$Pt(T diff --git a/tests/typ/markup/heading.typ b/tests/typ/markup/heading.typ index 4647e7a6e..cb0226173 100644 --- a/tests/typ/markup/heading.typ +++ b/tests/typ/markup/heading.typ @@ -8,8 +8,8 @@ === Level 2 ====== Level 6 -// Too many hashtags. -======= Level 7 +// At some point, it should stop shrinking. +=========== Level 11 --- // Heading vs. no heading. From 515fe89c5ea94e6bcdcfe387d006776d31ad3646 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Fri, 5 Nov 2021 13:21:39 +0100 Subject: [PATCH 13/18] Style changes Co-Authored-By: Martin --- src/parse/mod.rs | 128 +++++++++++----------------- src/parse/parser.rs | 172 +++++++++++++++++--------------------- tests/typ/code/import.typ | 1 - 3 files changed, 127 insertions(+), 174 deletions(-) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 21ca303ed..90be73f9c 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -15,8 +15,6 @@ use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; -type ParseResult = Result; - /// Parse a source file. pub fn parse(source: &str) -> Rc { let mut p = Parser::new(source); @@ -53,29 +51,34 @@ where { p.perform(NodeKind::Markup, |p| { while !p.eof() && f(p) { - markup_node(p, &mut at_start).ok(); + markup_node(p, &mut at_start); } }); } /// Parse a markup node. -fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { +fn markup_node(p: &mut Parser, at_start: &mut bool) { let token = match p.peek() { Some(t) => t, - None => return Ok(()), + None => return, }; match token { // Whitespace. NodeKind::Space(newlines) => { *at_start |= *newlines > 0; - if *newlines < 2 { p.eat(); } else { p.convert(NodeKind::Parbreak); } - return Ok(()); + return; + } + + // Comments. + NodeKind::LineComment | NodeKind::BlockComment => { + p.eat(); + return; } // Text and markup. @@ -112,7 +115,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { let group = if stmt { Group::Stmt } else { Group::Expr }; p.start_group(group, TokenMode::Code); - let res = expr_with(p, true, 0); + let res = expr_prec(p, true, 0); if stmt && res.is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } @@ -123,33 +126,18 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { NodeKind::LeftBrace => block(p), NodeKind::LeftBracket => template(p), - // Comments. - NodeKind::LineComment | NodeKind::BlockComment => { - p.eat(); - return Ok(()); - } - - NodeKind::Error(_, _) => { - p.eat(); - } - - _ => { - p.unexpected(); - return Err(()); - } + NodeKind::Error(_, _) => p.eat(), + _ => p.unexpected(), }; *at_start = false; - Ok(()) } /// Parse a heading. fn heading(p: &mut Parser) { p.perform(NodeKind::Heading, |p| { p.eat_assert(&NodeKind::Eq); - while p.eat_if(&NodeKind::Eq) {} - let column = p.column(p.prev_end()); markup_indented(p, column); }); @@ -175,7 +163,7 @@ fn enum_node(p: &mut Parser) { /// Parse an expression. fn expr(p: &mut Parser) -> ParseResult { - expr_with(p, false, 0) + expr_prec(p, false, 0) } /// Parse an expression with operators having at least the minimum precedence. @@ -185,20 +173,17 @@ fn expr(p: &mut Parser) -> ParseResult { /// in markup. /// /// Stops parsing at operations with lower precedence than `min_prec`, -fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { +fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. match p.eat_map(|x| UnOp::from_token(&x)) { Some(op) => { let prec = op.precedence(); - expr_with(p, atomic, prec)?; - + expr_prec(p, atomic, prec)?; marker.end(p, NodeKind::Unary); } - None => { - primary(p, atomic)?; - } + None => primary(p, atomic)?, }; loop { @@ -213,7 +198,7 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { } if atomic { - break Ok(()); + break; } if p.peek() == Some(&NodeKind::With) { @@ -222,14 +207,12 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let op = match p.peek().and_then(BinOp::from_token) { Some(binop) => binop, - None => { - break Ok(()); - } + None => break, }; let mut prec = op.precedence(); if prec < min_prec { - break Ok(()); + break; } p.eat(); @@ -239,8 +222,10 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { Associativity::Right => {} } - marker.perform(p, NodeKind::Binary, |p| expr_with(p, atomic, prec))?; + marker.perform(p, NodeKind::Binary, |p| expr_prec(p, atomic, prec))?; } + + Ok(()) } /// Parse a primary expression. @@ -260,7 +245,6 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { if !atomic && p.peek() == Some(&NodeKind::Arrow) { marker.end(p, NodeKind::ClosureParams); p.eat(); - marker.perform(p, NodeKind::Closure, expr) } else { Ok(()) @@ -288,7 +272,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { Some(NodeKind::Error(_, _)) => { p.eat(); - Ok(()) + Err(()) } // Nothing. @@ -330,6 +314,7 @@ fn literal(p: &mut Parser) -> bool { /// - Parameter list of closure expression fn parenthesized(p: &mut Parser) -> ParseResult { let marker = p.marker(); + p.start_group(Group::Paren, TokenMode::Code); let colon = p.eat_if(&NodeKind::Colon); let kind = collection(p).0; @@ -337,28 +322,26 @@ fn parenthesized(p: &mut Parser) -> ParseResult { // Leading colon makes this a (empty) dictionary. if colon { - return dict(p, &marker); + dict(p, &marker); + return Ok(()); } // Arrow means this is a closure's parameter list. if p.peek() == Some(&NodeKind::Arrow) { params(p, &marker, true); marker.end(p, NodeKind::ClosureParams); - p.eat_assert(&NodeKind::Arrow); - return marker.perform(p, NodeKind::Closure, expr); } // Find out which kind of collection this is. match kind { - CollectionKind::Group => { - marker.end(p, NodeKind::Group); - Ok(()) - } + CollectionKind::Group => marker.end(p, NodeKind::Group), CollectionKind::Positional => array(p, &marker), CollectionKind::Named => dict(p, &marker), } + + Ok(()) } /// The type of a collection. @@ -380,17 +363,18 @@ enum CollectionKind { fn collection(p: &mut Parser) -> (CollectionKind, usize) { let mut items = 0; let mut kind = CollectionKind::Positional; - let mut has_comma = false; + let mut can_group = true; let mut missing_coma: Option = None; while !p.eof() { if let Ok(item_kind) = item(p) { if items == 0 && item_kind == NodeKind::Named { kind = CollectionKind::Named; + can_group = false; } if item_kind == NodeKind::Spread { - has_comma = true; + can_group = false; } items += 1; @@ -404,14 +388,14 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { } if p.eat_if(&NodeKind::Comma) { - has_comma = true; + can_group = false; } else { missing_coma = Some(p.marker()); } } } - if !has_comma && items == 1 && kind == CollectionKind::Positional { + if can_group && items == 1 { kind = CollectionKind::Group; } @@ -422,23 +406,19 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { fn item(p: &mut Parser) -> ParseResult { let marker = p.marker(); if p.eat_if(&NodeKind::Dots) { - return marker - .perform(p, NodeKind::Spread, |p| expr(p).map(|_| NodeKind::Spread)); + marker.perform(p, NodeKind::Spread, expr)?; + return Ok(NodeKind::Spread); } - let ident_marker = p.marker(); expr(p)?; if p.peek() == Some(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { - if matches!( - ident_marker.child_at(p).unwrap().kind(), - &NodeKind::Ident(_) - ) { + if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) { p.eat(); - expr(p).map(|_| NodeKind::Named) + expr(p) } else { - ident_marker.end( + marker.end( p, NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), ); @@ -447,7 +427,8 @@ fn item(p: &mut Parser) -> ParseResult { expr(p).ok(); Err(()) } - }) + })?; + Ok(NodeKind::Named) } else { Ok(p.last_child().unwrap().kind().clone()) } @@ -455,7 +436,7 @@ fn item(p: &mut Parser) -> ParseResult { /// Convert a collection into an array, producing errors for anything other than /// expressions. -fn array(p: &mut Parser, marker: &Marker) -> ParseResult { +fn array(p: &mut Parser, marker: &Marker) { marker.filter_children(p, |x| match x.kind() { NodeKind::Named => Err(( ErrorPosition::Full, @@ -466,14 +447,12 @@ fn array(p: &mut Parser, marker: &Marker) -> ParseResult { } _ => Ok(()), }); - marker.end(p, NodeKind::Array); - Ok(()) } /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. -fn dict(p: &mut Parser, marker: &Marker) -> ParseResult { +fn dict(p: &mut Parser, marker: &Marker) { marker.filter_children(p, |x| match x.kind() { NodeKind::Named | NodeKind::Comma | NodeKind::Colon => Ok(()), NodeKind::Spread => { @@ -485,9 +464,7 @@ fn dict(p: &mut Parser, marker: &Marker) -> ParseResult { "expected named pair, found expression".into(), )), }); - marker.end(p, NodeKind::Dict); - Ok(()) } /// Convert a collection into a list of parameters, producing errors for @@ -591,7 +568,8 @@ fn let_expr(p: &mut Parser) -> ParseResult { with_expr(p, &marker)?; } else { // If a parenthesis follows, this is a function definition. - let has_params = if p.peek_direct() == Some(&NodeKind::LeftParen) { + let has_params = p.peek_direct() == Some(&NodeKind::LeftParen); + if has_params { p.perform(NodeKind::ClosureParams, |p| { p.start_group(Group::Paren, TokenMode::Code); let marker = p.marker(); @@ -599,10 +577,7 @@ fn let_expr(p: &mut Parser) -> ParseResult { params(p, &marker, true); p.end_group(); }); - true - } else { - false - }; + } if p.eat_if(&NodeKind::Eq) { expr(p)?; @@ -655,7 +630,6 @@ fn while_expr(p: &mut Parser) -> ParseResult { fn for_expr(p: &mut Parser) -> ParseResult { p.perform(NodeKind::ForExpr, |p| { p.eat_assert(&NodeKind::For); - for_pattern(p)?; p.eat_expect(&NodeKind::In)?; expr(p)?; @@ -668,8 +642,7 @@ fn for_expr(p: &mut Parser) -> ParseResult { fn for_pattern(p: &mut Parser) -> ParseResult { p.perform(NodeKind::ForPattern, |p| { ident(p)?; - if p.peek() == Some(&NodeKind::Comma) { - p.eat(); + if p.eat_if(&NodeKind::Comma) { ident(p)?; } Ok(()) @@ -699,9 +672,8 @@ fn import_expr(p: &mut Parser) -> ParseResult { }); }; - if p.eat_expect(&NodeKind::From).is_ok() { - expr(p)?; - } + p.eat_expect(&NodeKind::From)?; + expr(p)?; Ok(()) }) diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 3813ee840..4f181821a 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,10 +1,14 @@ use std::ops::Range; use std::rc::Rc; -use super::{ParseResult, TokenMode, Tokens}; +use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; +/// Allows parser methods to use the try operator. Not exposed as the parser +/// recovers from all errors. +pub(crate) type ParseResult = Result; + /// A convenient token-based parser. pub struct Parser<'s> { /// The parsed file. @@ -56,59 +60,6 @@ pub enum Group { Imports, } -/// A marker that indicates where a child may start. -pub struct Marker(usize); - -impl Marker { - /// Wraps all children in front of the marker. - pub fn end(&self, p: &mut Parser, kind: NodeKind) { - let stop_nl = p.stop_at_newline(); - let end = (self.0 .. p.children.len()) - .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) - .unwrap_or(self.0) - + 1; - - let children: Vec<_> = p.children.drain(self.0 .. end).collect(); - p.children - .insert(self.0, GreenNode::with_children(kind, children).into()); - } - - /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children(&self, p: &mut Parser, f: F) - where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, - { - p.filter_children(self, f) - } - - /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(&self, p: &mut Parser, what: &str) { - p.children.insert( - self.0, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); - } - - /// Return a reference to the child after the marker. - pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { - p.children.get(self.0) - } - - pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Parser) -> T, - { - let success = f(p); - self.end(p, kind); - success - } -} - impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { @@ -127,40 +78,16 @@ impl<'s> Parser<'s> { } } - /// Start a nested node. - /// - /// Each start call has to be matched with a call to `end`, - /// `end_with_custom_children`, `lift`, `abort`, or `end_or_abort`. - fn start(&mut self) { - self.stack.push(std::mem::take(&mut self.children)); - } - - /// Filter the last children using the given predicate. - fn filter_children(&mut self, count: &Marker, f: F) + /// Perform a subparse that wraps its result in a node with the given kind. + pub fn perform(&mut self, kind: NodeKind, f: F) -> T where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + F: FnOnce(&mut Self) -> T, { - for child in &mut self.children[count.0 ..] { - if !((self.tokens.mode() != TokenMode::Code - || Self::skip_type_ext(child.kind(), false)) - || child.kind().is_error()) - { - if let Err((pos, msg)) = f(child) { - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); - } - } - } - } + let prev = std::mem::take(&mut self.children); + let output = f(self); + let mut children = std::mem::replace(&mut self.children, prev); - /// End the current node as a node of given `kind`. - fn end(&mut self, kind: NodeKind) { - let outer = self.stack.pop().unwrap(); - let mut children = std::mem::replace(&mut self.children, outer); - - // have trailing whitespace continue to sit in self.children in code - // mode. + // Trailing trivia should not be wrapped into the new node. let mut remains = vec![]; if self.tokens.mode() == TokenMode::Code { let len = children.len(); @@ -176,16 +103,8 @@ impl<'s> Parser<'s> { self.children.push(GreenNode::with_children(kind, children).into()); self.children.extend(remains); - } - pub fn perform(&mut self, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Self) -> T, - { - self.start(); - let success = f(self); - self.end(kind); - success + output } /// Eat and wrap the next token. @@ -332,7 +251,6 @@ impl<'s> Parser<'s> { /// This panics if the next token does not start the given group. pub fn start_group(&mut self, kind: Group, mode: TokenMode) { self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); - self.tokens.set_mode(mode); self.repeek(); @@ -534,3 +452,67 @@ impl<'s> Parser<'s> { Marker(self.children.len()) } } + +/// A marker that indicates where a child may start. +pub struct Marker(usize); + +impl Marker { + /// Wraps all children in front of the marker. + pub fn end(&self, p: &mut Parser, kind: NodeKind) { + let stop_nl = p.stop_at_newline(); + let end = (self.0 .. p.children.len()) + .rev() + .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .unwrap_or(self.0) + + 1; + + let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + p.children + .insert(self.0, GreenNode::with_children(kind, children).into()); + } + + /// Wrap all children that do not fulfill the predicate in error nodes. + pub fn filter_children(&self, p: &mut Parser, f: F) + where + F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + { + for child in &mut p.children[self.0 ..] { + if !((p.tokens.mode() != TokenMode::Code + || Parser::skip_type_ext(child.kind(), false)) + || child.kind().is_error()) + { + if let Err((pos, msg)) = f(child) { + let inner = std::mem::take(child); + *child = + GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); + } + } + } + } + + /// Insert an error message that `what` was expected at the marker position. + pub fn expected_at(&self, p: &mut Parser, what: &str) { + p.children.insert( + self.0, + GreenData::new( + NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), + 0, + ) + .into(), + ); + } + + /// Return a reference to the child after the marker. + pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) + } + + pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } +} diff --git a/tests/typ/code/import.typ b/tests/typ/code/import.typ index 1fa8f2057..683bb52a3 100644 --- a/tests/typ/code/import.typ +++ b/tests/typ/code/import.typ @@ -114,5 +114,4 @@ This is never reached. // An item after a star. // Should output `, a from "target.typ"`. // Error: 10 expected keyword `from` -// Error: 10 expected semicolon or line break #import *, a from "target.typ" From 41bdafb5785dd85d20a3e79900b18e0010f6d71d Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sat, 6 Nov 2021 12:12:02 +0100 Subject: [PATCH 14/18] Faster parser --- src/parse/mod.rs | 61 +-- src/parse/parser.rs | 888 ++++++++++++++++++++----------------------- src/parse/resolve.rs | 4 +- src/parse/tokens.rs | 14 +- src/syntax/mod.rs | 83 ++-- 5 files changed, 487 insertions(+), 563 deletions(-) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 90be73f9c..aa616fdf8 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -13,13 +13,16 @@ pub use tokens::*; use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; -use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; +use crate::syntax::{ErrorPosition, Green, GreenNode, NodeKind}; /// Parse a source file. pub fn parse(source: &str) -> Rc { let mut p = Parser::new(source); markup(&mut p); - p.finish() + match p.finish().into_iter().next() { + Some(Green::Node(node)) => node, + _ => unreachable!(), + } } /// Parse markup. @@ -36,7 +39,7 @@ fn markup_indented(p: &mut Parser, column: usize) { }); markup_while(p, false, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.next_end()) >= column, + Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column, _ => true, }) } @@ -114,7 +117,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { let stmt = matches!(token, NodeKind::Let | NodeKind::Import); let group = if stmt { Group::Stmt } else { Group::Expr }; - p.start_group(group, TokenMode::Code); + p.start_group(group); let res = expr_prec(p, true, 0); if stmt && res.is_ok() && !p.eof() { p.expected_at("semicolon or line break"); @@ -177,8 +180,9 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. - match p.eat_map(|x| UnOp::from_token(&x)) { + match p.peek().and_then(UnOp::from_token) { Some(op) => { + p.eat(); let prec = op.precedence(); expr_prec(p, atomic, prec)?; marker.end(p, NodeKind::Unary); @@ -201,7 +205,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { break; } - if p.peek() == Some(&NodeKind::With) { + if p.at(&NodeKind::With) { with_expr(p, &marker)?; } @@ -242,7 +246,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { p.eat(); // Arrow means this is a closure's lone parameter. - if !atomic && p.peek() == Some(&NodeKind::Arrow) { + if !atomic && p.at(&NodeKind::Arrow) { marker.end(p, NodeKind::ClosureParams); p.eat(); marker.perform(p, NodeKind::Closure, expr) @@ -315,7 +319,7 @@ fn literal(p: &mut Parser) -> bool { fn parenthesized(p: &mut Parser) -> ParseResult { let marker = p.marker(); - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); let colon = p.eat_if(&NodeKind::Colon); let kind = collection(p).0; p.end_group(); @@ -327,14 +331,14 @@ fn parenthesized(p: &mut Parser) -> ParseResult { } // Arrow means this is a closure's parameter list. - if p.peek() == Some(&NodeKind::Arrow) { + if p.at(&NodeKind::Arrow) { params(p, &marker, true); marker.end(p, NodeKind::ClosureParams); p.eat_assert(&NodeKind::Arrow); return marker.perform(p, NodeKind::Closure, expr); } - // Find out which kind of collection this is. + // Transform into the identified collection. match kind { CollectionKind::Group => marker.end(p, NodeKind::Group), CollectionKind::Positional => array(p, &marker), @@ -402,7 +406,8 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { (kind, items) } -/// Parse an expression or a named pair. Returns if this is a named pair. +/// Parse an expression or a named pair, returning whether it's a spread or a +/// named pair. fn item(p: &mut Parser) -> ParseResult { let marker = p.marker(); if p.eat_if(&NodeKind::Dots) { @@ -412,25 +417,24 @@ fn item(p: &mut Parser) -> ParseResult { expr(p)?; - if p.peek() == Some(&NodeKind::Colon) { + if p.at(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) { p.eat(); expr(p) } else { - marker.end( - p, - NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), - ); + let error = + NodeKind::Error(ErrorPosition::Full, "expected identifier".into()); + marker.end(p, error); p.eat(); - expr(p).ok(); Err(()) } })?; + Ok(NodeKind::Named) } else { - Ok(p.last_child().unwrap().kind().clone()) + Ok(NodeKind::None) } } @@ -488,7 +492,7 @@ fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { // Parse a template block: `[...]`. fn template(p: &mut Parser) { p.perform(NodeKind::Template, |p| { - p.start_group(Group::Bracket, TokenMode::Markup); + p.start_group(Group::Bracket); markup(p); p.end_group(); }); @@ -497,9 +501,9 @@ fn template(p: &mut Parser) { /// Parse a code block: `{...}`. fn block(p: &mut Parser) { p.perform(NodeKind::Block, |p| { - p.start_group(Group::Brace, TokenMode::Code); + p.start_group(Group::Brace); while !p.eof() { - p.start_group(Group::Stmt, TokenMode::Code); + p.start_group(Group::Stmt); if expr(p).is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } @@ -515,7 +519,7 @@ fn block(p: &mut Parser) { /// Parse a function call. fn call(p: &mut Parser, callee: &Marker) -> ParseResult { callee.perform(p, NodeKind::Call, |p| match p.peek_direct() { - Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => { + Some(NodeKind::LeftParen | NodeKind::LeftBracket) => { args(p, true); Ok(()) } @@ -530,7 +534,7 @@ fn call(p: &mut Parser, callee: &Marker) -> ParseResult { fn args(p: &mut Parser, allow_template: bool) { p.perform(NodeKind::CallArgs, |p| { if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); collection(p); p.end_group(); } @@ -546,7 +550,7 @@ fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { marker.perform(p, NodeKind::WithExpr, |p| { p.eat_assert(&NodeKind::With); - if p.peek() == Some(&NodeKind::LeftParen) { + if p.at(&NodeKind::LeftParen) { args(p, false); Ok(()) } else { @@ -564,14 +568,14 @@ fn let_expr(p: &mut Parser) -> ParseResult { let marker = p.marker(); ident(p)?; - if p.peek() == Some(&NodeKind::With) { + if p.at(&NodeKind::With) { with_expr(p, &marker)?; } else { // If a parenthesis follows, this is a function definition. let has_params = p.peek_direct() == Some(&NodeKind::LeftParen); if has_params { p.perform(NodeKind::ClosureParams, |p| { - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); let marker = p.marker(); collection(p); params(p, &marker, true); @@ -605,7 +609,7 @@ fn if_expr(p: &mut Parser) -> ParseResult { body(p)?; if p.eat_if(&NodeKind::Else) { - if p.peek() == Some(&NodeKind::If) { + if p.at(&NodeKind::If) { if_expr(p)?; } else { body(p)?; @@ -657,7 +661,7 @@ fn import_expr(p: &mut Parser) -> ParseResult { if !p.eat_if(&NodeKind::Star) { // This is the list of identifiers scenario. p.perform(NodeKind::ImportItems, |p| { - p.start_group(Group::Imports, TokenMode::Code); + p.start_group(Group::Imports); let marker = p.marker(); let items = collection(p).1; if items == 0 { @@ -712,6 +716,5 @@ fn body(p: &mut Parser) -> ParseResult { return Err(()); } } - Ok(()) } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 4f181821a..5d26ff636 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,5 +1,4 @@ -use std::ops::Range; -use std::rc::Rc; +use std::mem; use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; @@ -11,27 +10,425 @@ pub(crate) type ParseResult = Result; /// A convenient token-based parser. pub struct Parser<'s> { - /// The parsed file. - src: &'s str, /// An iterator over the source tokens. tokens: Tokens<'s>, + /// Whether we are at the end of the file or of a group. + eof: bool, + /// The current token. + current: Option, + /// The end byte index of the last (non-whitespace if in code mode) token. + prev_end: usize, + /// The start byte index of the peeked token. + current_start: usize, /// The stack of open groups. groups: Vec, - /// The next token. - next: Option, - /// The peeked token. - /// (Same as `next` except if we are at the end of group, then `None`). - peeked: Option, - /// The end index of the last (non-whitespace if in code mode) token. - prev_end: usize, - /// The start index of the peeked token. - next_start: usize, - /// A stack of outer children vectors. - stack: Vec>, /// The children of the currently built node. children: Vec, } +impl<'s> Parser<'s> { + /// Create a new parser for the source string. + pub fn new(src: &'s str) -> Self { + let mut tokens = Tokens::new(src, TokenMode::Markup); + let current = tokens.next(); + Self { + tokens, + eof: current.is_none(), + current, + prev_end: 0, + current_start: 0, + groups: vec![], + children: vec![], + } + } + + /// End the parsing process and return the last child. + pub fn finish(self) -> Vec { + self.children + } + + /// Create a new marker. + pub fn marker(&mut self) -> Marker { + Marker(self.children.len()) + } + + /// Perform a subparse that wraps its result in a node with the given kind. + pub fn perform(&mut self, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Self) -> T, + { + let prev = mem::take(&mut self.children); + let output = f(self); + let mut children = mem::replace(&mut self.children, prev); + + // Trailing trivia should not be wrapped into the new node. + let mut remains = vec![]; + if self.tokens.mode() == TokenMode::Code { + let len = children.len(); + for n in (0 .. len).rev() { + if !self.is_trivia(&children[n].kind()) { + break; + } + + remains.push(children.pop().unwrap()); + } + remains.reverse(); + } + + self.children.push(GreenNode::with_children(kind, children).into()); + self.children.extend(remains); + + output + } + + /// Whether the end of the source string or group is reached. + pub fn eof(&self) -> bool { + self.eof + } + + /// Consume the current token and also trailing trivia if in code mode. + pub fn eat(&mut self) { + self.prev_end = self.tokens.index(); + self.bump(); + + if self.tokens.mode() == TokenMode::Code { + // Skip whitespace and comments. + while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) { + self.bump(); + } + } + + self.repeek(); + } + + /// Eat if the current token it is the given one. + pub fn eat_if(&mut self, t: &NodeKind) -> bool { + let at = self.at(t); + if at { + self.eat(); + } + at + } + + /// Eat if the current token is the given one and produce an error if not. + pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult { + let eaten = self.eat_if(t); + if !eaten { + self.expected_at(t.as_str()); + } + if eaten { Ok(()) } else { Err(()) } + } + + /// Eat, debug-asserting that the token is the given one. + pub fn eat_assert(&mut self, t: &NodeKind) { + debug_assert_eq!(self.peek(), Some(t)); + self.eat(); + } + + /// Eat tokens while the condition is true. + pub fn eat_while(&mut self, mut f: F) + where + F: FnMut(&NodeKind) -> bool, + { + while self.peek().map_or(false, |t| f(t)) { + self.eat(); + } + } + + /// Eat the current token, but change its type. + pub fn convert(&mut self, kind: NodeKind) { + let idx = self.children.len(); + self.eat(); + if let Some(child) = self.children.get_mut(idx) { + child.set_kind(kind); + } + } + + /// Whether the current token is of the given type. + pub fn at(&self, kind: &NodeKind) -> bool { + self.peek() == Some(kind) + } + + /// Peek at the current token without consuming it. + pub fn peek(&self) -> Option<&NodeKind> { + if self.eof { None } else { self.current.as_ref() } + } + + /// Peek at the current token, if it follows immediately after the last one + /// without any trivia in between. + pub fn peek_direct(&self) -> Option<&NodeKind> { + if self.prev_end() == self.current_start() { + self.peek() + } else { + None + } + } + + /// Peek at the source of the current token. + pub fn peek_src(&self) -> &'s str { + self.tokens.scanner().get(self.current_start() .. self.current_end()) + } + + /// The byte index at which the last token ended. + /// + /// Refers to the end of the last non-trivia token in code mode. + pub fn prev_end(&self) -> usize { + self.prev_end + } + + /// The byte index at which the current token starts. + pub fn current_start(&self) -> usize { + self.current_start + } + + /// The byte index at which the current token ends. + pub fn current_end(&self) -> usize { + self.tokens.index() + } + + /// Determine the column index for the given byte index. + pub fn column(&self, index: usize) -> usize { + self.tokens.scanner().column(index) + } + + /// Continue parsing in a group. + /// + /// When the end delimiter of the group is reached, all subsequent calls to + /// `peek()` return `None`. Parsing can only continue with a matching call + /// to `end_group`. + /// + /// This panics if the current token does not start the given group. + pub fn start_group(&mut self, kind: Group) { + self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); + self.tokens.set_mode(match kind { + Group::Bracket => TokenMode::Markup, + _ => TokenMode::Code, + }); + + self.repeek(); + match kind { + Group::Paren => self.eat_assert(&NodeKind::LeftParen), + Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), + Group::Brace => self.eat_assert(&NodeKind::LeftBrace), + Group::Stmt => {} + Group::Expr => {} + Group::Imports => {} + } + } + + /// End the parsing of a group. + /// + /// This panics if no group was started. + pub fn end_group(&mut self) { + let group_mode = self.tokens.mode(); + let group = self.groups.pop().expect("no started group"); + self.tokens.set_mode(group.prev_mode); + self.repeek(); + + let mut rescan = self.tokens.mode() != group_mode; + + // Eat the end delimiter if there is one. + if let Some((end, required)) = match group.kind { + Group::Paren => Some((NodeKind::RightParen, true)), + Group::Bracket => Some((NodeKind::RightBracket, true)), + Group::Brace => Some((NodeKind::RightBrace, true)), + Group::Stmt => Some((NodeKind::Semicolon, false)), + Group::Expr => None, + Group::Imports => None, + } { + if self.current.as_ref() == Some(&end) { + // Bump the delimeter and return. No need to rescan in this case. + self.eat(); + rescan = false; + } else if required { + self.push_error(format!("expected {}", end)); + } + } + + // Rescan the peeked token if the mode changed. + if rescan { + if group_mode == TokenMode::Code { + let len = self.children.len(); + for n in (0 .. len).rev() { + if !self.is_trivia(self.children[n].kind()) { + break; + } + + self.children.pop(); + } + } + + self.tokens.jump(self.prev_end()); + self.prev_end = self.tokens.index(); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); + self.repeek(); + } + } + + /// Low-level bump that consumes exactly one token without special trivia + /// handling. + fn bump(&mut self) { + let kind = self.current.take().unwrap(); + let len = self.tokens.index() - self.current_start; + self.children.push(GreenData::new(kind, len).into()); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); + } + + /// Take another look at the current token to recheck whether it ends a + /// group. + fn repeek(&mut self) { + self.eof = match &self.current { + Some(NodeKind::RightParen) => self.inside(Group::Paren), + Some(NodeKind::RightBracket) => self.inside(Group::Bracket), + Some(NodeKind::RightBrace) => self.inside(Group::Brace), + Some(NodeKind::Semicolon) => self.inside(Group::Stmt), + Some(NodeKind::From) => self.inside(Group::Imports), + Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(), + Some(_) => false, + None => true, + }; + } + + /// Returns whether the given type can be skipped over. + fn is_trivia(&self, token: &NodeKind) -> bool { + Self::is_trivia_ext(token, self.stop_at_newline()) + } + + /// Returns whether the given type can be skipped over given the current + /// newline mode. + fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool { + match token { + NodeKind::Space(n) => *n == 0 || !stop_at_newline, + NodeKind::LineComment => true, + NodeKind::BlockComment => true, + _ => false, + } + } + + /// Whether the active group must end at a newline. + fn stop_at_newline(&self) -> bool { + matches!( + self.groups.last().map(|group| group.kind), + Some(Group::Stmt | Group::Expr | Group::Imports) + ) + } + + /// Whether we are inside the given group. + fn inside(&self, kind: Group) -> bool { + self.groups.iter().any(|g| g.kind == kind) + } +} + +/// Error handling. +impl Parser<'_> { + /// Push an error into the children list. + pub fn push_error(&mut self, msg: impl Into) { + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.children.push(GreenData::new(error, 0).into()); + } + + /// Eat the current token and add an error that it is unexpected. + pub fn unexpected(&mut self) { + match self.peek() { + Some(found) => { + let msg = format!("unexpected {}", found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.push_error("unexpected end of file"), + } + } + + /// Eat the current token and add an error that it is not the expected `thing`. + pub fn expected(&mut self, thing: &str) { + match self.peek() { + Some(found) => { + let msg = format!("expected {}, found {}", thing, found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.expected_at(thing), + } + } + + /// Add an error that the `thing` was expected at the end of the last + /// non-trivia token. + pub fn expected_at(&mut self, thing: &str) { + let mut found = self.children.len(); + for (i, node) in self.children.iter().enumerate().rev() { + if !self.is_trivia(node.kind()) { + break; + } + found = i; + } + + Marker(found).expected_at(self, thing); + } +} + +/// A marker that indicates where a node may start. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct Marker(usize); + +impl Marker { + /// Perform a subparse that wraps all children after the marker in a node + /// with the given kind. + pub fn perform(self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } + + /// Wrap all children after the marker in a node with the given `kind`. + pub fn end(self, p: &mut Parser, kind: NodeKind) { + let end = (self.0 .. p.children.len()) + .rev() + .find(|&i| !p.is_trivia(p.children[i].kind())) + .unwrap_or(self.0) + + 1; + + let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + p.children + .insert(self.0, GreenNode::with_children(kind, children).into()); + } + + /// Wrap all children that do not fulfill the predicate in error nodes. + pub fn filter_children(self, p: &mut Parser, f: F) + where + F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + { + for child in &mut p.children[self.0 ..] { + if (p.tokens.mode() == TokenMode::Markup + || !Parser::is_trivia_ext(child.kind(), false)) + && !child.kind().is_error() + { + if let Err((pos, msg)) = f(child) { + let error = NodeKind::Error(pos, msg); + let inner = mem::take(child); + *child = GreenNode::with_child(error, inner).into(); + } + } + } + } + + /// Insert an error message that `what` was expected at the marker position. + pub fn expected_at(self, p: &mut Parser, what: &str) { + let msg = format!("expected {}", what); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + p.children.insert(self.0, GreenData::new(error, 0).into()); + } + + /// Return a reference to the child directly after the marker. + pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) + } +} + /// A logical group of tokens, e.g. `[...]`. struct GroupEntry { /// The kind of group this is. This decides which tokens will end the group. @@ -46,12 +443,12 @@ struct GroupEntry { /// A group, confined by optional start and end delimiters. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Group { - /// A parenthesized group: `(...)`. - Paren, /// A bracketed group: `[...]`. Bracket, /// A curly-braced group: `{...}`. Brace, + /// A parenthesized group: `(...)`. + Paren, /// A group ended by a semicolon or a line break: `;`, `\n`. Stmt, /// A group for a single expression, ended by a line break. @@ -59,460 +456,3 @@ pub enum Group { /// A group for import items, ended by a semicolon, line break or `from`. Imports, } - -impl<'s> Parser<'s> { - /// Create a new parser for the source string. - pub fn new(src: &'s str) -> Self { - let mut tokens = Tokens::new(src, TokenMode::Markup); - let next = tokens.next(); - Self { - src, - tokens, - groups: vec![], - next: next.clone(), - peeked: next, - prev_end: 0, - next_start: 0, - stack: vec![], - children: vec![], - } - } - - /// Perform a subparse that wraps its result in a node with the given kind. - pub fn perform(&mut self, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Self) -> T, - { - let prev = std::mem::take(&mut self.children); - let output = f(self); - let mut children = std::mem::replace(&mut self.children, prev); - - // Trailing trivia should not be wrapped into the new node. - let mut remains = vec![]; - if self.tokens.mode() == TokenMode::Code { - let len = children.len(); - for n in (0 .. len).rev() { - if !self.skip_type(&children[n].kind()) { - break; - } - - remains.push(children.pop().unwrap()); - } - remains.reverse(); - } - - self.children.push(GreenNode::with_children(kind, children).into()); - self.children.extend(remains); - - output - } - - /// Eat and wrap the next token. - pub fn convert(&mut self, kind: NodeKind) { - self.eat(); - self.children.last_mut().unwrap().set_kind(kind); - } - - /// End the current node and undo its existence, inling all accumulated - /// children into its parent. - pub fn lift(&mut self) { - let outer = self.stack.pop().unwrap(); - let children = std::mem::replace(&mut self.children, outer); - self.children.extend(children); - } - - /// Add an error to the current children list. - fn push_error(&mut self, msg: impl Into) { - self.children.push( - GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0) - .into(), - ); - } - - /// End the parsing process and return the last child. - pub fn finish(&mut self) -> Rc { - match self.children.pop().unwrap() { - Green::Node(n) => n, - _ => panic!(), - } - } - - /// Whether the end of the source string or group is reached. - pub fn eof(&self) -> bool { - self.peek().is_none() - } - - /// Consume the next token if it is the given one. - pub fn eat_if(&mut self, t: &NodeKind) -> bool { - if self.peek() == Some(t) { - self.eat(); - true - } else { - false - } - } - - /// Consume the next token if the closure maps it a to `Some`-variant. - pub fn eat_map(&mut self, f: F) -> Option - where - F: FnOnce(&NodeKind) -> Option, - { - let token = self.peek()?; - let mapped = f(token); - if mapped.is_some() { - self.eat(); - } - mapped - } - - /// Consume the next token if it is the given one and produce an error if - /// not. - pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult { - let eaten = self.eat_if(t); - if !eaten { - self.expected_at(t.as_str()); - } - if eaten { Ok(()) } else { Err(()) } - } - - /// Consume the next token, debug-asserting that it is one of the given ones. - pub fn eat_assert(&mut self, t: &NodeKind) { - let next = self.peek(); - debug_assert_eq!(next, Some(t)); - self.eat(); - } - - /// Consume tokens while the condition is true. - pub fn eat_while(&mut self, mut f: F) - where - F: FnMut(&NodeKind) -> bool, - { - while self.peek().map_or(false, |t| f(t)) { - self.eat(); - } - } - - /// Peek at the next token without consuming it. - pub fn peek(&self) -> Option<&NodeKind> { - self.peeked.as_ref() - } - - /// Peek at the next token if it follows immediately after the last one - /// without any whitespace in between. - pub fn peek_direct(&self) -> Option<&NodeKind> { - if self.next_start() == self.prev_end() { - self.peeked.as_ref() - } else { - None - } - } - - /// Peek at the source of the next token. - pub fn peek_src(&self) -> &'s str { - self.get(self.next_start() .. self.next_end()) - } - - /// The byte index at which the last token ended. - /// - /// Refers to the end of the last _non-whitespace_ token in code mode. - pub fn prev_end(&self) -> usize { - self.prev_end - } - - /// The byte index at which the next token starts. - pub fn next_start(&self) -> usize { - self.next_start - } - - /// The byte index at which the next token will end. - /// - /// Is the same as [`next_start()`][Self::next_start] if `peek()` returns - /// `None`. - pub fn next_end(&self) -> usize { - self.tokens.index() - } - - /// Determine the column index for the given byte index. - pub fn column(&self, index: usize) -> usize { - self.tokens.column(index) - } - - /// Slice out part of the source string. - pub fn get(&self, range: Range) -> &'s str { - self.src.get(range).unwrap() - } - - /// Continue parsing in a group. - /// - /// When the end delimiter of the group is reached, all subsequent calls to - /// `eat()` and `peek()` return `None`. Parsing can only continue with - /// a matching call to `end_group`. - /// - /// This panics if the next token does not start the given group. - pub fn start_group(&mut self, kind: Group, mode: TokenMode) { - self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); - self.tokens.set_mode(mode); - self.repeek(); - - match kind { - Group::Paren => self.eat_assert(&NodeKind::LeftParen), - Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), - Group::Brace => self.eat_assert(&NodeKind::LeftBrace), - Group::Stmt => {} - Group::Expr => {} - Group::Imports => {} - } - } - - /// End the parsing of a group. - /// - /// This panics if no group was started. - pub fn end_group(&mut self) { - let prev_mode = self.tokens.mode(); - let group = self.groups.pop().expect("no started group"); - self.tokens.set_mode(group.prev_mode); - self.repeek(); - - let mut rescan = self.tokens.mode() != prev_mode; - - // Eat the end delimiter if there is one. - if let Some((end, required)) = match group.kind { - Group::Paren => Some((NodeKind::RightParen, true)), - Group::Bracket => Some((NodeKind::RightBracket, true)), - Group::Brace => Some((NodeKind::RightBrace, true)), - Group::Stmt => Some((NodeKind::Semicolon, false)), - Group::Expr => None, - Group::Imports => None, - } { - if self.next == Some(end.clone()) { - // Bump the delimeter and return. No need to rescan in this case. - self.eat(); - rescan = false; - } else if required { - self.push_error(format!("expected {}", end)); - } - } - - // Rescan the peeked token if the mode changed. - if rescan { - if prev_mode == TokenMode::Code { - let len = self.children.len(); - for n in (0 .. len).rev() { - if !self.skip_type(self.children[n].kind()) { - break; - } - - self.children.pop(); - } - } - - self.tokens.jump(self.prev_end()); - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - self.repeek(); - } - } - - /// Add an error that `what` was expected at the given span. - pub fn expected_at(&mut self, what: &str) { - let mut found = self.children.len(); - for (i, node) in self.children.iter().enumerate().rev() { - if !self.skip_type(node.kind()) { - break; - } - found = i; - } - - Marker(found).expected_at(self, what); - } - - /// Eat the next token and add an error that it is not the expected `thing`. - pub fn expected(&mut self, what: &str) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("expected {}, found {}", what, found).into(), - ), - Self::eat, - ); - } - None => self.expected_at(what), - } - } - - /// Eat the next token and add an error that it is unexpected. - pub fn unexpected(&mut self) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("unexpected {}", found).into(), - ), - Self::eat, - ); - } - None => self.push_error("unexpected end of file"), - } - } - - /// Returns whether the given type can be skipped over given the current - /// newline mode. - pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { - match token { - NodeKind::Space(n) => n < &1 || !stop_at_newline, - NodeKind::LineComment => true, - NodeKind::BlockComment => true, - _ => false, - } - } - - /// Returns whether the given type can be skipped over. - fn skip_type(&self, token: &NodeKind) -> bool { - Self::skip_type_ext(token, self.stop_at_newline()) - } - - /// Consume the next token. - pub fn eat(&mut self) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - - if self.tokens.mode() == TokenMode::Code { - // Skip whitespace and comments. - while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - } - } - - self.repeek(); - } - - /// Take another look at the next token to recheck whether it ends a group. - fn repeek(&mut self) { - self.peeked = self.next.clone(); - let token = match self.next.as_ref() { - Some(token) => token, - None => return, - }; - - if match token { - NodeKind::RightParen => self.inside(Group::Paren), - NodeKind::RightBracket => self.inside(Group::Bracket), - NodeKind::RightBrace => self.inside(Group::Brace), - NodeKind::Semicolon => self.inside(Group::Stmt), - NodeKind::From => self.inside(Group::Imports), - NodeKind::Space(n) => n > &0 && self.stop_at_newline(), - _ => false, - } { - self.peeked = None; - } - } - - /// Whether the active group ends at a newline. - fn stop_at_newline(&self) -> bool { - matches!( - self.groups.last().map(|group| group.kind), - Some(Group::Stmt | Group::Expr | Group::Imports) - ) - } - - /// Whether we are inside the given group. - fn inside(&self, kind: Group) -> bool { - self.groups.iter().any(|g| g.kind == kind) - } - - /// Returns the last child of the current stack frame. - pub fn last_child(&self) -> Option<&Green> { - self.children.last() - } - - /// Create a new marker. - pub fn marker(&mut self) -> Marker { - Marker(self.children.len()) - } -} - -/// A marker that indicates where a child may start. -pub struct Marker(usize); - -impl Marker { - /// Wraps all children in front of the marker. - pub fn end(&self, p: &mut Parser, kind: NodeKind) { - let stop_nl = p.stop_at_newline(); - let end = (self.0 .. p.children.len()) - .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) - .unwrap_or(self.0) - + 1; - - let children: Vec<_> = p.children.drain(self.0 .. end).collect(); - p.children - .insert(self.0, GreenNode::with_children(kind, children).into()); - } - - /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children(&self, p: &mut Parser, f: F) - where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, - { - for child in &mut p.children[self.0 ..] { - if !((p.tokens.mode() != TokenMode::Code - || Parser::skip_type_ext(child.kind(), false)) - || child.kind().is_error()) - { - if let Err((pos, msg)) = f(child) { - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); - } - } - } - } - - /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(&self, p: &mut Parser, what: &str) { - p.children.insert( - self.0, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); - } - - /// Return a reference to the child after the marker. - pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { - p.children.get(self.0) - } - - pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Parser) -> T, - { - let success = f(p); - self.end(p, kind); - success - } -} diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 3fab98a4b..b330dbd6a 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -172,8 +172,8 @@ mod tests { test("typst\n it!", "typst", "\n it!"); test("typst\n it!", "typst", "\n it!"); test("abc`", "abc", "`"); - test(" hi", "", " hi"); - test("`", "", "`"); + test(" hi", "", " hi"); + test("`", "", "`"); } #[test] diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index aa28e1f50..494a9f0b9 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -57,12 +57,6 @@ impl<'s> Tokens<'s> { self.s.jump(index); } - /// The column of a given index in the source string. - #[inline] - pub fn column(&self, index: usize) -> usize { - self.s.column(index) - } - /// The underlying scanner. #[inline] pub fn scanner(&self) -> Scanner<'s> { @@ -314,7 +308,7 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.column(self.s.index() - 1); + let column = self.s.column(self.s.index() - 1); let mut backticks = 1; while self.s.eat_if('`') && backticks < u8::MAX { @@ -342,10 +336,8 @@ impl<'s> Tokens<'s> { } } - let terminated = found == backticks; - let end = self.s.index() - if terminated { found as usize } else { 0 }; - - if terminated { + if found == backticks { + let end = self.s.index() - found as usize; NodeKind::Raw(Rc::new(resolve_raw( column, backticks, diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 363cbe6e9..022b51de0 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -42,11 +42,10 @@ impl Green { /// Set the type of the node. pub fn set_kind(&mut self, kind: NodeKind) { - let data = match self { - Self::Node(node) => &mut Rc::make_mut(node).data, - Self::Token(data) => data, - }; - data.set_kind(kind); + match self { + Self::Node(node) => Rc::make_mut(node).data.set_kind(kind), + Self::Token(data) => data.set_kind(kind), + } } /// The length of the node. @@ -56,7 +55,10 @@ impl Green { /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { - self.data().erroneous() + match self { + Self::Node(node) => node.erroneous, + Self::Token(data) => data.kind.is_error(), + } } /// The node's children. @@ -94,26 +96,32 @@ pub struct GreenNode { data: GreenData, /// This node's children, losslessly make up this node. children: Vec, + /// Whether this node or any of its children are erroneous. + erroneous: bool, } impl GreenNode { - /// Creates a new node with the given kind and children. - pub fn with_children(kind: NodeKind, children: Vec) -> Self { - let mut data = GreenData::new(kind, 0); - let len = children - .iter() - .inspect(|c| data.erroneous |= c.erroneous()) - .map(Green::len) - .sum(); - data.len = len; - Self { data, children } - } - /// Creates a new node with the given kind and a single child. pub fn with_child(kind: NodeKind, child: impl Into) -> Self { Self::with_children(kind, vec![child.into()]) } + /// Creates a new node with the given kind and children. + pub fn with_children(kind: NodeKind, children: Vec) -> Self { + let mut erroneous = kind.is_error(); + let len = children + .iter() + .inspect(|c| erroneous |= c.erroneous()) + .map(Green::len) + .sum(); + + Self { + data: GreenData::new(kind, len), + children, + erroneous, + } + } + /// The node's children. pub fn children(&self) -> &[Green] { &self.children @@ -140,14 +148,12 @@ pub struct GreenData { kind: NodeKind, /// The byte length of the node in the source. len: usize, - /// Whether this node or any of its children contain an error. - erroneous: bool, } impl GreenData { /// Create new node metadata. pub fn new(kind: NodeKind, len: usize) -> Self { - Self { len, erroneous: kind.is_error(), kind } + Self { len, kind } } /// The type of the node. @@ -164,11 +170,6 @@ impl GreenData { pub fn len(&self) -> usize { self.len } - - /// Whether the node or its children contain an error. - pub fn erroneous(&self) -> bool { - self.erroneous - } } impl From for Green { @@ -219,7 +220,7 @@ impl<'a> RedRef<'a> { /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { - if !self.green.erroneous() { + if !self.erroneous() { return vec![]; } @@ -235,7 +236,7 @@ impl<'a> RedRef<'a> { } _ => self .children() - .filter(|red| red.green.erroneous()) + .filter(|red| red.erroneous()) .flat_map(|red| red.errors()) .collect(), } @@ -256,11 +257,11 @@ impl<'a> RedRef<'a> { Green::Token(_) => &[], }; - let mut offset = self.offset; + let mut cursor = self.offset; children.iter().map(move |green| { - let child_offset = offset; - offset += green.len(); - RedRef { id: self.id, offset: child_offset, green } + let offset = cursor; + cursor += green.len(); + RedRef { id: self.id, offset, green } }) } @@ -623,29 +624,17 @@ pub enum ErrorPosition { impl NodeKind { /// Whether this is some kind of parenthesis. pub fn is_paren(&self) -> bool { - match self { - Self::LeftParen => true, - Self::RightParen => true, - _ => false, - } + matches!(self, Self::LeftParen | Self::RightParen) } /// Whether this is some kind of bracket. pub fn is_bracket(&self) -> bool { - match self { - Self::LeftBracket => true, - Self::RightBracket => true, - _ => false, - } + matches!(self, Self::LeftBracket | Self::RightBracket) } /// Whether this is some kind of brace. pub fn is_brace(&self) -> bool { - match self { - Self::LeftBrace => true, - Self::RightBrace => true, - _ => false, - } + matches!(self, Self::LeftBrace | Self::RightBrace) } /// Whether this is some kind of error. From 8117ca9950a2027efae133f811a26a4a7bf86a8e Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sat, 6 Nov 2021 15:30:08 +0100 Subject: [PATCH 15/18] Deduplicate trivia search --- src/parse/parser.rs | 72 ++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 44 deletions(-) diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5d26ff636..a30895ad5 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -16,7 +16,7 @@ pub struct Parser<'s> { eof: bool, /// The current token. current: Option, - /// The end byte index of the last (non-whitespace if in code mode) token. + /// The end byte index of the last non-trivia token. prev_end: usize, /// The start byte index of the peeked token. current_start: usize, @@ -59,25 +59,19 @@ impl<'s> Parser<'s> { { let prev = mem::take(&mut self.children); let output = f(self); + let until = self.trivia_start(); let mut children = mem::replace(&mut self.children, prev); - // Trailing trivia should not be wrapped into the new node. - let mut remains = vec![]; if self.tokens.mode() == TokenMode::Code { - let len = children.len(); - for n in (0 .. len).rev() { - if !self.is_trivia(&children[n].kind()) { - break; - } - - remains.push(children.pop().unwrap()); - } - remains.reverse(); + // Trailing trivia should not be wrapped into the new node. + let idx = self.children.len(); + self.children.push(Green::default()); + self.children.extend(children.drain(until ..)); + self.children[idx] = GreenNode::with_children(kind, children).into(); + } else { + self.children.push(GreenNode::with_children(kind, children).into()); } - self.children.push(GreenNode::with_children(kind, children).into()); - self.children.extend(remains); - output } @@ -86,7 +80,7 @@ impl<'s> Parser<'s> { self.eof } - /// Consume the current token and also trailing trivia if in code mode. + /// Consume the current token and also trailing trivia. pub fn eat(&mut self) { self.prev_end = self.tokens.index(); self.bump(); @@ -169,9 +163,7 @@ impl<'s> Parser<'s> { self.tokens.scanner().get(self.current_start() .. self.current_end()) } - /// The byte index at which the last token ended. - /// - /// Refers to the end of the last non-trivia token in code mode. + /// The byte index at which the last non-trivia token ended. pub fn prev_end(&self) -> usize { self.prev_end } @@ -248,14 +240,7 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { if group_mode == TokenMode::Code { - let len = self.children.len(); - for n in (0 .. len).rev() { - if !self.is_trivia(self.children[n].kind()) { - break; - } - - self.children.pop(); - } + self.children.truncate(self.trivia_start()); } self.tokens.jump(self.prev_end()); @@ -307,6 +292,17 @@ impl<'s> Parser<'s> { } } + /// Find the index in the children list where trailing trivia starts. + fn trivia_start(&self) -> usize { + self.children.len() + - self + .children + .iter() + .rev() + .take_while(|node| self.is_trivia(node.kind())) + .count() + } + /// Whether the active group must end at a newline. fn stop_at_newline(&self) -> bool { matches!( @@ -356,15 +352,7 @@ impl Parser<'_> { /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. pub fn expected_at(&mut self, thing: &str) { - let mut found = self.children.len(); - for (i, node) in self.children.iter().enumerate().rev() { - if !self.is_trivia(node.kind()) { - break; - } - found = i; - } - - Marker(found).expected_at(self, thing); + Marker(self.trivia_start()).expected_at(self, thing); } } @@ -384,15 +372,11 @@ impl Marker { success } - /// Wrap all children after the marker in a node with the given `kind`. + /// Wrap all children after the marker (excluding trailing trivia) in a node + /// with the given `kind`. pub fn end(self, p: &mut Parser, kind: NodeKind) { - let end = (self.0 .. p.children.len()) - .rev() - .find(|&i| !p.is_trivia(p.children[i].kind())) - .unwrap_or(self.0) - + 1; - - let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + let until = p.trivia_start(); + let children = p.children.drain(self.0 .. until).collect(); p.children .insert(self.0, GreenNode::with_children(kind, children).into()); } From 95866d5fc9ae89a23c5754193c7de5d4fe4873b1 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sun, 7 Nov 2021 22:05:48 +0100 Subject: [PATCH 16/18] Tidy up AST --- src/eval/capture.rs | 80 +--------- src/eval/mod.rs | 33 ++-- src/parse/mod.rs | 75 ++++------ src/parse/parser.rs | 37 +++-- src/parse/resolve.rs | 14 +- src/parse/scanner.rs | 8 +- src/parse/tokens.rs | 19 ++- src/syntax/ast.rs | 350 +++++++++++++------------------------------ src/syntax/mod.rs | 309 ++++++++++++++++++++------------------ src/syntax/pretty.rs | 6 +- 10 files changed, 370 insertions(+), 561 deletions(-) diff --git a/src/eval/capture.rs b/src/eval/capture.rs index e46103c8b..786da36e7 100644 --- a/src/eval/capture.rs +++ b/src/eval/capture.rs @@ -1,93 +1,27 @@ use std::rc::Rc; -use super::{Scope, Scopes, Value}; -use crate::syntax::ast::{ClosureParam, Expr, Imports}; -use crate::syntax::RedRef; +use super::{Scope, Scopes}; +use crate::syntax::{NodeKind, RedRef}; /// A visitor that captures variable slots. pub struct CapturesVisitor<'a> { external: &'a Scopes<'a>, - internal: Scopes<'a>, captures: Scope, } impl<'a> CapturesVisitor<'a> { /// Create a new visitor for the given external scopes. pub fn new(external: &'a Scopes) -> Self { - Self { - external, - internal: Scopes::new(None), - captures: Scope::new(), - } + Self { external, captures: Scope::new() } } pub fn visit(&mut self, node: RedRef) { - let expr: Option = node.cast(); - - match expr.as_ref() { - Some(Expr::Let(expr)) => { - self.visit(expr.init_ref()); - let ident = expr.binding(); - self.internal.def_mut(ident.as_str(), Value::None); - } - Some(Expr::Closure(closure)) => { - for arg in closure.params() { - match arg { - ClosureParam::Pos(ident) | ClosureParam::Sink(ident) => { - self.internal.def_mut(ident.as_str(), Value::None); - } - ClosureParam::Named(name) => { - self.internal.def_mut(name.name().as_str(), Value::None); - } - } - } - self.visit(closure.body_ref()); - } - Some(Expr::For(forloop)) => { - let pattern = forloop.pattern(); - self.internal.def_mut(pattern.value().as_str(), Value::None); - - if let Some(key) = pattern.key() { - self.internal.def_mut(key.as_str(), Value::None); - } - self.visit(forloop.body_ref()); - } - Some(Expr::Import(import)) => { - if let Imports::Idents(idents) = import.imports() { - for ident in idents { - self.internal.def_mut(ident.as_str(), Value::None); - } + match node.kind() { + NodeKind::Ident(ident) => { + if let Some(slot) = self.external.get(ident.as_str()) { + self.captures.def_slot(ident.as_str(), Rc::clone(slot)); } } - Some(Expr::Ident(ident)) => { - if self.internal.get(ident.as_str()).is_none() { - if let Some(slot) = self.external.get(ident.as_str()) { - self.captures.def_slot(ident.as_str(), Rc::clone(slot)); - } - } - } - _ => {} - } - - match expr.as_ref() { - Some(Expr::Let(_)) | Some(Expr::For(_)) | Some(Expr::Closure(_)) => {} - - Some(Expr::Block(_)) => { - self.internal.enter(); - for child in node.children() { - self.visit(child); - } - self.internal.exit(); - } - - Some(Expr::Template(_)) => { - self.internal.enter(); - for child in node.children() { - self.visit(child); - } - self.internal.exit(); - } - _ => { for child in node.children() { self.visit(child); diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 809209f46..7c984691a 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -219,7 +219,7 @@ impl Eval for Ident { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - match ctx.scopes.get(self) { + match ctx.scopes.get(&self.string) { Some(slot) => Ok(slot.borrow().clone()), None => bail!(self.span, "unknown variable"), } @@ -401,7 +401,7 @@ impl Eval for CallArgs { CallArg::Named(x) => { items.push(Arg { span, - name: Some((&x.name().string).into()), + name: Some(x.name().string.into()), value: Spanned::new(x.expr().eval(ctx)?, x.expr().span()), }); } @@ -443,12 +443,10 @@ impl Eval for ClosureExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let name = self.name().as_ref().map(|name| name.string.clone()); - // Collect captured variables. let captured = { let mut visitor = CapturesVisitor::new(&ctx.scopes); - visitor.visit(self.underlying()); + visitor.visit(self.as_red()); visitor.finish() }; @@ -459,23 +457,24 @@ impl Eval for ClosureExpr { for param in self.params() { match param { ClosureParam::Pos(name) => { - params.push((name.string.clone(), None)); + params.push((name.string, None)); } - ClosureParam::Named(x) => { - params.push((x.name().string.clone(), Some(x.expr().eval(ctx)?))); + ClosureParam::Named(named) => { + params.push((named.name().string, Some(named.expr().eval(ctx)?))); } ClosureParam::Sink(name) => { if sink.is_some() { bail!(name.span, "only one argument sink is allowed"); } - sink = Some(name.string.clone()); + sink = Some(name.string); } } } // Clone the body expression so that we don't have a lifetime // dependence on the AST. - let body = self.body().clone(); + let name = self.name().map(|name| name.string); + let body = self.body(); // Define the actual function. let func = Function::new(name, move |ctx, args| { @@ -534,7 +533,7 @@ impl Eval for LetExpr { Some(expr) => expr.eval(ctx)?, None => Value::None, }; - ctx.scopes.def_mut(self.binding().as_str(), value); + ctx.scopes.def_mut(self.binding().string, value); Ok(Value::None) } } @@ -590,7 +589,7 @@ impl Eval for ForExpr { #[allow(unused_parens)] for ($($value),*) in $iter { - $(ctx.scopes.def_mut($binding.as_str(), $value);)* + $(ctx.scopes.def_mut(&$binding.string, $value);)* let value = self.body().eval(ctx)?; output = ops::join(output, value) @@ -637,16 +636,16 @@ impl Eval for ImportExpr { let file = ctx.import(&path, self.path().span())?; let module = &ctx.modules[&file]; - match &self.imports() { + match self.imports() { Imports::Wildcard => { for (var, slot) in module.scope.iter() { ctx.scopes.def_mut(var, slot.borrow().clone()); } } - Imports::Idents(idents) => { + Imports::Items(idents) => { for ident in idents { - if let Some(slot) = module.scope.get(&ident) { - ctx.scopes.def_mut(ident.as_str(), slot.borrow().clone()); + if let Some(slot) = module.scope.get(&ident.string) { + ctx.scopes.def_mut(ident.string, slot.borrow().clone()); } else { bail!(ident.span, "unresolved import"); } @@ -692,7 +691,7 @@ impl Access for Expr { impl Access for Ident { fn access<'a>(&self, ctx: &'a mut EvalContext) -> TypResult> { - match ctx.scopes.get(self) { + match ctx.scopes.get(&self.string) { Some(slot) => match slot.try_borrow_mut() { Ok(guard) => Ok(guard), Err(_) => bail!(self.span, "cannot mutate a constant"), diff --git a/src/parse/mod.rs b/src/parse/mod.rs index aa616fdf8..505482cad 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -13,7 +13,7 @@ pub use tokens::*; use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; -use crate::syntax::{ErrorPosition, Green, GreenNode, NodeKind}; +use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; /// Parse a source file. pub fn parse(source: &str) -> Rc { @@ -197,7 +197,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { p.peek_direct(), Some(NodeKind::LeftParen | NodeKind::LeftBracket) ) { - call(p, &marker)?; + call(p, marker)?; continue; } @@ -206,7 +206,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { } if p.at(&NodeKind::With) { - with_expr(p, &marker)?; + with_expr(p, marker)?; } let op = match p.peek().and_then(BinOp::from_token) { @@ -248,7 +248,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { // Arrow means this is a closure's lone parameter. if !atomic && p.at(&NodeKind::Arrow) { marker.end(p, NodeKind::ClosureParams); - p.eat(); + p.eat_assert(&NodeKind::Arrow); marker.perform(p, NodeKind::Closure, expr) } else { Ok(()) @@ -326,14 +326,13 @@ fn parenthesized(p: &mut Parser) -> ParseResult { // Leading colon makes this a (empty) dictionary. if colon { - dict(p, &marker); + dict(p, marker); return Ok(()); } // Arrow means this is a closure's parameter list. if p.at(&NodeKind::Arrow) { - params(p, &marker, true); - marker.end(p, NodeKind::ClosureParams); + params(p, marker); p.eat_assert(&NodeKind::Arrow); return marker.perform(p, NodeKind::Closure, expr); } @@ -341,8 +340,8 @@ fn parenthesized(p: &mut Parser) -> ParseResult { // Transform into the identified collection. match kind { CollectionKind::Group => marker.end(p, NodeKind::Group), - CollectionKind::Positional => array(p, &marker), - CollectionKind::Named => dict(p, &marker), + CollectionKind::Positional => array(p, marker), + CollectionKind::Named => dict(p, marker), } Ok(()) @@ -384,7 +383,7 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { items += 1; if let Some(marker) = missing_coma.take() { - marker.expected_at(p, "comma"); + marker.expected(p, "comma"); } if p.eof() { @@ -419,12 +418,11 @@ fn item(p: &mut Parser) -> ParseResult { if p.at(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { - if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) { + if matches!(marker.peek(p).unwrap().kind(), &NodeKind::Ident(_)) { p.eat(); expr(p) } else { - let error = - NodeKind::Error(ErrorPosition::Full, "expected identifier".into()); + let error = NodeKind::Error(ErrorPos::Full, "expected identifier".into()); marker.end(p, error); p.eat(); expr(p).ok(); @@ -440,15 +438,10 @@ fn item(p: &mut Parser) -> ParseResult { /// Convert a collection into an array, producing errors for anything other than /// expressions. -fn array(p: &mut Parser, marker: &Marker) { +fn array(p: &mut Parser, marker: Marker) { marker.filter_children(p, |x| match x.kind() { - NodeKind::Named => Err(( - ErrorPosition::Full, - "expected expression, found named pair".into(), - )), - NodeKind::Spread => { - Err((ErrorPosition::Full, "spreading is not allowed here".into())) - } + NodeKind::Named => Err("expected expression, found named pair"), + NodeKind::Spread => Err("spreading is not allowed here"), _ => Ok(()), }); marker.end(p, NodeKind::Array); @@ -456,25 +449,21 @@ fn array(p: &mut Parser, marker: &Marker) { /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. -fn dict(p: &mut Parser, marker: &Marker) { +fn dict(p: &mut Parser, marker: Marker) { marker.filter_children(p, |x| match x.kind() { + kind if kind.is_paren() => Ok(()), NodeKind::Named | NodeKind::Comma | NodeKind::Colon => Ok(()), - NodeKind::Spread => { - Err((ErrorPosition::Full, "spreading is not allowed here".into())) - } - _ if x.kind().is_paren() => Ok(()), - _ => Err(( - ErrorPosition::Full, - "expected named pair, found expression".into(), - )), + NodeKind::Spread => Err("spreading is not allowed here"), + _ => Err("expected named pair, found expression"), }); marker.end(p, NodeKind::Dict); } /// Convert a collection into a list of parameters, producing errors for /// anything other than identifiers, spread operations and named pairs. -fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { +fn params(p: &mut Parser, marker: Marker) { marker.filter_children(p, |x| match x.kind() { + kind if kind.is_paren() => Ok(()), NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()), NodeKind::Spread if matches!( @@ -484,9 +473,9 @@ fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { { Ok(()) } - _ if allow_parens && x.kind().is_paren() => Ok(()), - _ => Err((ErrorPosition::Full, "expected identifier".into())), + _ => Err("expected identifier"), }); + marker.end(p, NodeKind::ClosureParams); } // Parse a template block: `[...]`. @@ -517,7 +506,7 @@ fn block(p: &mut Parser) { } /// Parse a function call. -fn call(p: &mut Parser, callee: &Marker) -> ParseResult { +fn call(p: &mut Parser, callee: Marker) -> ParseResult { callee.perform(p, NodeKind::Call, |p| match p.peek_direct() { Some(NodeKind::LeftParen | NodeKind::LeftBracket) => { args(p, true); @@ -546,7 +535,7 @@ fn args(p: &mut Parser, allow_template: bool) { } /// Parse a with expression. -fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { +fn with_expr(p: &mut Parser, marker: Marker) -> ParseResult { marker.perform(p, NodeKind::WithExpr, |p| { p.eat_assert(&NodeKind::With); @@ -569,18 +558,16 @@ fn let_expr(p: &mut Parser) -> ParseResult { ident(p)?; if p.at(&NodeKind::With) { - with_expr(p, &marker)?; + with_expr(p, marker)?; } else { // If a parenthesis follows, this is a function definition. let has_params = p.peek_direct() == Some(&NodeKind::LeftParen); if has_params { - p.perform(NodeKind::ClosureParams, |p| { - p.start_group(Group::Paren); - let marker = p.marker(); - collection(p); - params(p, &marker, true); - p.end_group(); - }); + let marker = p.marker(); + p.start_group(Group::Paren); + collection(p); + p.end_group(); + params(p, marker); } if p.eat_if(&NodeKind::Eq) { @@ -671,7 +658,7 @@ fn import_expr(p: &mut Parser) -> ParseResult { marker.filter_children(p, |n| match n.kind() { NodeKind::Ident(_) | NodeKind::Comma => Ok(()), - _ => Err((ErrorPosition::Full, "expected identifier".into())), + _ => Err("expected identifier"), }); }); }; diff --git a/src/parse/parser.rs b/src/parse/parser.rs index a30895ad5..5ebc2c17e 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::mem; use super::{TokenMode, Tokens}; -use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; +use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; /// Allows parser methods to use the try operator. Not exposed as the parser @@ -131,11 +131,9 @@ impl<'s> Parser<'s> { /// Eat the current token, but change its type. pub fn convert(&mut self, kind: NodeKind) { - let idx = self.children.len(); + let marker = self.marker(); self.eat(); - if let Some(child) = self.children.get_mut(idx) { - child.set_kind(kind); - } + marker.convert(self, kind); } /// Whether the current token is of the given type. @@ -321,7 +319,7 @@ impl<'s> Parser<'s> { impl Parser<'_> { /// Push an error into the children list. pub fn push_error(&mut self, msg: impl Into) { - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.children.push(GreenData::new(error, 0).into()); } @@ -330,7 +328,7 @@ impl Parser<'_> { match self.peek() { Some(found) => { let msg = format!("unexpected {}", found); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.perform(error, Self::eat); } None => self.push_error("unexpected end of file"), @@ -342,7 +340,7 @@ impl Parser<'_> { match self.peek() { Some(found) => { let msg = format!("expected {}, found {}", thing, found); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.perform(error, Self::eat); } None => self.expected_at(thing), @@ -352,7 +350,7 @@ impl Parser<'_> { /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. pub fn expected_at(&mut self, thing: &str) { - Marker(self.trivia_start()).expected_at(self, thing); + Marker(self.trivia_start()).expected(self, thing); } } @@ -384,15 +382,15 @@ impl Marker { /// Wrap all children that do not fulfill the predicate in error nodes. pub fn filter_children(self, p: &mut Parser, f: F) where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + F: Fn(&Green) -> Result<(), &'static str>, { for child in &mut p.children[self.0 ..] { if (p.tokens.mode() == TokenMode::Markup || !Parser::is_trivia_ext(child.kind(), false)) && !child.kind().is_error() { - if let Err((pos, msg)) = f(child) { - let error = NodeKind::Error(pos, msg); + if let Err(msg) = f(child) { + let error = NodeKind::Error(ErrorPos::Full, msg.into()); let inner = mem::take(child); *child = GreenNode::with_child(error, inner).into(); } @@ -401,16 +399,23 @@ impl Marker { } /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(self, p: &mut Parser, what: &str) { + pub fn expected(self, p: &mut Parser, what: &str) { let msg = format!("expected {}", what); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); p.children.insert(self.0, GreenData::new(error, 0).into()); } - /// Return a reference to the child directly after the marker. - pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> { + /// Peek at the child directly after the marker. + pub fn peek<'a>(self, p: &'a Parser) -> Option<&'a Green> { p.children.get(self.0) } + + /// Convert the child directly after marker. + pub fn convert(self, p: &mut Parser, kind: NodeKind) { + if let Some(child) = p.children.get_mut(self.0) { + child.convert(kind); + } + } } /// A logical group of tokens, e.g. `[...]`. diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index b330dbd6a..6719f41df 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,4 +1,4 @@ -use super::{is_newline, Scanner}; +use super::{is_ident, is_newline, Scanner}; use crate::syntax::RawData; use crate::util::EcoString; @@ -51,7 +51,7 @@ pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); RawData { - lang: Some(tag.into()), + lang: is_ident(tag).then(|| tag.into()), text: text.into(), backticks, block, @@ -201,15 +201,15 @@ mod tests { // More than one backtick with lang tag. test(0, 2, "js alert()", Some("js"), "alert()", false); test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true); - test(0, 2, "♥", Some("♥"), "", false); + test(0, 2, "♥", None, "", false); // Trimming of whitespace (tested more thoroughly in separate test). - test(0, 2, " a", Some(""), "a", false); - test(0, 2, " a", Some(""), " a", false); - test(0, 2, " \na", Some(""), "a", true); + test(0, 2, " a", None, "a", false); + test(0, 2, " a", None, " a", false); + test(0, 2, " \na", None, "a", true); // Dedenting - test(2, 3, " def foo():\n bar()", Some(""), "def foo():\n bar()", true); + test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true); } #[test] diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 92a2333d4..ea06a2e06 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -182,7 +182,13 @@ pub fn is_newline(character: char) -> bool { ) } -/// Whether a string is a valid identifier. +/// Whether a string is a valid unicode identifier. +/// +/// In addition to what is specified in the [Unicode Standard][uax31], we allow: +/// - `_` as a starting character, +/// - `_` and `-` as continuing characters. +/// +/// [uax31]: http://www.unicode.org/reports/tr31/ #[inline] pub fn is_ident(string: &str) -> bool { let mut chars = string.chars(); diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 494a9f0b9..1523cd643 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -236,20 +236,19 @@ impl<'s> Tokens<'s> { 'u' if self.s.rest().starts_with("u{") => { self.s.eat_assert('u'); self.s.eat_assert('{'); - let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); - + let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric()); if self.s.eat_if('}') { if let Some(c) = resolve_hex(&sequence) { NodeKind::UnicodeEscape(c) } else { NodeKind::Error( - ErrorPosition::Full, + ErrorPos::Full, "invalid unicode escape sequence".into(), ) } } else { NodeKind::Error( - ErrorPosition::End, + ErrorPos::End, "expected closing brace".into(), ) } @@ -348,7 +347,7 @@ impl<'s> Tokens<'s> { let noun = if remaining == 1 { "backtick" } else { "backticks" }; NodeKind::Error( - ErrorPosition::End, + ErrorPos::End, if found == 0 { format!("expected {} {}", remaining, noun) } else { @@ -396,7 +395,7 @@ impl<'s> Tokens<'s> { })) } else { NodeKind::Error( - ErrorPosition::End, + ErrorPos::End, if !display || (!escaped && dollar) { "expected closing dollar sign" } else { @@ -487,7 +486,7 @@ impl<'s> Tokens<'s> { if self.s.eat_if('"') { NodeKind::Str(string) } else { - NodeKind::Error(ErrorPosition::End, "expected quote".into()) + NodeKind::Error(ErrorPos::End, "expected quote".into()) } } @@ -555,7 +554,7 @@ mod tests { use super::*; - use ErrorPosition::*; + use ErrorPos::*; use NodeKind::*; use Option::None; use TokenMode::{Code, Markup}; @@ -564,7 +563,7 @@ mod tests { NodeKind::UnicodeEscape(c) } - fn Error(pos: ErrorPosition, message: &str) -> NodeKind { + fn Error(pos: ErrorPos, message: &str) -> NodeKind { NodeKind::Error(pos, message.into()) } @@ -881,7 +880,7 @@ mod tests { // Test more backticks. t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); - t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false)); + t!(Markup: "````🚀````" => Raw("", None, 4, false)); t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks")); t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 1198d6b1e..dc71e2295 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,10 +1,7 @@ //! A typed layer over the red-green tree. -use std::ops::Deref; - use super::{NodeKind, RedNode, RedRef, Span}; use crate::geom::{AngularUnit, LengthUnit}; -use crate::parse::is_ident; use crate::util::EcoString; /// A typed AST node. @@ -40,7 +37,7 @@ macro_rules! node { } /// The underlying red node. - pub fn underlying(&self) -> RedRef { + pub fn as_red(&self) -> RedRef { self.0.as_ref() } } @@ -112,7 +109,7 @@ impl TypedNode for MarkupNode { #[derive(Debug, Clone, PartialEq)] pub struct RawNode { /// An optional identifier specifying the language to syntax-highlight in. - pub lang: Option, + pub lang: Option, /// The raw text, determined as the raw string between the backticks trimmed /// according to the above rules. pub text: EcoString, @@ -124,18 +121,11 @@ pub struct RawNode { impl TypedNode for RawNode { fn from_red(node: RedRef) -> Option { match node.kind() { - NodeKind::Raw(raw) => { - let full = node.span(); - let start = full.start + raw.backticks as usize; - Some(Self { - block: raw.block, - lang: raw.lang.as_ref().and_then(|lang| { - let span = Span::new(full.source, start, start + lang.len()); - Ident::new(lang, span) - }), - text: raw.text.clone(), - }) - } + NodeKind::Raw(raw) => Some(Self { + block: raw.block, + lang: raw.lang.clone(), + text: raw.text.clone(), + }), _ => None, } } @@ -149,9 +139,7 @@ node! { impl HeadingNode { /// The contents of the heading. pub fn body(&self) -> Markup { - self.0 - .cast_first_child() - .expect("heading node is missing markup body") + self.0.cast_first_child().expect("heading is missing markup body") } /// The section depth (numer of equals signs). @@ -184,7 +172,7 @@ node! { impl EnumNode { /// The contents of the list item. pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("enumeration node is missing body") + self.0.cast_first_child().expect("enum node is missing body") } /// The number, if any. @@ -195,7 +183,7 @@ impl EnumNode { NodeKind::EnumNumbering(num) => Some(num.clone()), _ => None, }) - .expect("enumeration node is missing number") + .expect("enum node is missing number") } } @@ -240,6 +228,31 @@ pub enum Expr { Include(IncludeExpr), } +impl TypedNode for Expr { + fn from_red(node: RedRef) -> Option { + match node.kind() { + NodeKind::Ident(_) => node.cast().map(Self::Ident), + NodeKind::Array => node.cast().map(Self::Array), + NodeKind::Dict => node.cast().map(Self::Dict), + NodeKind::Template => node.cast().map(Self::Template), + NodeKind::Group => node.cast().map(Self::Group), + NodeKind::Block => node.cast().map(Self::Block), + NodeKind::Unary => node.cast().map(Self::Unary), + NodeKind::Binary => node.cast().map(Self::Binary), + NodeKind::Call => node.cast().map(Self::Call), + NodeKind::Closure => node.cast().map(Self::Closure), + NodeKind::WithExpr => node.cast().map(Self::With), + NodeKind::LetExpr => node.cast().map(Self::Let), + NodeKind::IfExpr => node.cast().map(Self::If), + NodeKind::WhileExpr => node.cast().map(Self::While), + NodeKind::ForExpr => node.cast().map(Self::For), + NodeKind::ImportExpr => node.cast().map(Self::Import), + NodeKind::IncludeExpr => node.cast().map(Self::Include), + _ => node.cast().map(Self::Lit), + } + } +} + impl Expr { /// Whether the expression can be shortened in markup with a hashtag. pub fn has_short_form(&self) -> bool { @@ -280,31 +293,6 @@ impl Expr { } } -impl TypedNode for Expr { - fn from_red(node: RedRef) -> Option { - match node.kind() { - NodeKind::Ident(_) => node.cast().map(Self::Ident), - NodeKind::Array => node.cast().map(Self::Array), - NodeKind::Dict => node.cast().map(Self::Dict), - NodeKind::Template => node.cast().map(Self::Template), - NodeKind::Group => node.cast().map(Self::Group), - NodeKind::Block => node.cast().map(Self::Block), - NodeKind::Unary => node.cast().map(Self::Unary), - NodeKind::Binary => node.cast().map(Self::Binary), - NodeKind::Call => node.cast().map(Self::Call), - NodeKind::Closure => node.cast().map(Self::Closure), - NodeKind::WithExpr => node.cast().map(Self::With), - NodeKind::LetExpr => node.cast().map(Self::Let), - NodeKind::IfExpr => node.cast().map(Self::If), - NodeKind::WhileExpr => node.cast().map(Self::While), - NodeKind::ForExpr => node.cast().map(Self::For), - NodeKind::ImportExpr => node.cast().map(Self::Import), - NodeKind::IncludeExpr => node.cast().map(Self::Include), - _ => node.cast().map(Self::Lit), - } - } -} - /// A literal: `1`, `true`, ... #[derive(Debug, Clone, PartialEq)] pub enum Lit { @@ -335,17 +323,17 @@ pub enum Lit { impl TypedNode for Lit { fn from_red(node: RedRef) -> Option { - match node.kind() { + match *node.kind() { NodeKind::None => Some(Self::None(node.span())), NodeKind::Auto => Some(Self::Auto(node.span())), - NodeKind::Bool(b) => Some(Self::Bool(node.span(), *b)), - NodeKind::Int(i) => Some(Self::Int(node.span(), *i)), - NodeKind::Float(f) => Some(Self::Float(node.span(), *f)), - NodeKind::Length(f, unit) => Some(Self::Length(node.span(), *f, *unit)), - NodeKind::Angle(f, unit) => Some(Self::Angle(node.span(), *f, *unit)), - NodeKind::Percentage(f) => Some(Self::Percent(node.span(), *f)), - NodeKind::Fraction(f) => Some(Self::Fractional(node.span(), *f)), - NodeKind::Str(s) => Some(Self::Str(node.span(), s.clone())), + NodeKind::Bool(v) => Some(Self::Bool(node.span(), v)), + NodeKind::Int(v) => Some(Self::Int(node.span(), v)), + NodeKind::Float(v) => Some(Self::Float(node.span(), v)), + NodeKind::Length(v, unit) => Some(Self::Length(node.span(), v, unit)), + NodeKind::Angle(v, unit) => Some(Self::Angle(node.span(), v, unit)), + NodeKind::Percentage(v) => Some(Self::Percent(node.span(), v)), + NodeKind::Fraction(v) => Some(Self::Fractional(node.span(), v)), + NodeKind::Str(ref v) => Some(Self::Str(node.span(), v.clone())), _ => None, } } @@ -354,17 +342,17 @@ impl TypedNode for Lit { impl Lit { /// The source code location. pub fn span(&self) -> Span { - match self { - Self::None(span) => *span, - Self::Auto(span) => *span, - Self::Bool(span, _) => *span, - Self::Int(span, _) => *span, - Self::Float(span, _) => *span, - Self::Length(span, _, _) => *span, - Self::Angle(span, _, _) => *span, - Self::Percent(span, _) => *span, - Self::Fractional(span, _) => *span, - Self::Str(span, _) => *span, + match *self { + Self::None(span) => span, + Self::Auto(span) => span, + Self::Bool(span, _) => span, + Self::Int(span, _) => span, + Self::Float(span, _) => span, + Self::Length(span, _, _) => span, + Self::Angle(span, _, _) => span, + Self::Percent(span, _) => span, + Self::Fractional(span, _) => span, + Self::Str(span, _) => span, } } } @@ -401,16 +389,12 @@ node! { impl Named { /// The name: `pattern`. pub fn name(&self) -> Ident { - self.0.cast_first_child().expect("named pair is missing name ident") + self.0.cast_first_child().expect("named pair is missing name") } /// The right-hand side of the pair: `dashed`. pub fn expr(&self) -> Expr { - self.0 - .children() - .filter_map(RedRef::cast) - .nth(1) - .expect("named pair is missing expression") + self.0.cast_last_child().expect("named pair is missing expression") } } @@ -422,9 +406,7 @@ node! { impl TemplateExpr { /// The contents of the template. pub fn body(&self) -> Markup { - self.0 - .cast_first_child() - .expect("template expression is missing body") + self.0.cast_first_child().expect("template is missing body") } } @@ -436,9 +418,7 @@ node! { impl GroupExpr { /// The wrapped expression. pub fn expr(&self) -> Expr { - self.0 - .cast_first_child() - .expect("group expression is missing expression") + self.0.cast_first_child().expect("group is missing expression") } } @@ -469,9 +449,7 @@ impl UnaryExpr { /// The expression to operator on: `x`. pub fn expr(&self) -> Expr { - self.0 - .cast_first_child() - .expect("unary expression is missing expression") + self.0.cast_last_child().expect("unary expression is missing child") } } @@ -506,7 +484,7 @@ impl UnOp { /// The precedence of this operator. pub fn precedence(self) -> usize { match self { - Self::Pos | Self::Neg => 8, + Self::Pos | Self::Neg => 7, Self::Not => 4, } } @@ -544,9 +522,7 @@ impl BinaryExpr { /// The right-hand side of the operation: `b`. pub fn rhs(&self) -> Expr { self.0 - .children() - .filter_map(RedRef::cast) - .nth(1) + .cast_last_child() .expect("binary expression is missing right-hand side") } } @@ -701,14 +677,12 @@ node! { impl CallExpr { /// The function to call. pub fn callee(&self) -> Expr { - self.0.cast_first_child().expect("call expression is missing callee") + self.0.cast_first_child().expect("call is missing callee") } /// The arguments to the function. pub fn args(&self) -> CallArgs { - self.0 - .cast_first_child() - .expect("call expression is missing argument list") + self.0.cast_last_child().expect("call is missing argument list") } } @@ -738,14 +712,9 @@ pub enum CallArg { impl TypedNode for CallArg { fn from_red(node: RedRef) -> Option { match node.kind() { - NodeKind::Named => Some(CallArg::Named( - node.cast().expect("named call argument is missing name"), - )), - NodeKind::Spread => Some(CallArg::Spread( - node.cast_first_child() - .expect("call argument sink is missing expression"), - )), - _ => Some(CallArg::Pos(node.cast()?)), + NodeKind::Named => node.cast().map(CallArg::Named), + NodeKind::Spread => node.cast_first_child().map(CallArg::Spread), + _ => node.cast().map(CallArg::Pos), } } } @@ -754,8 +723,8 @@ impl CallArg { /// The name of this argument. pub fn span(&self) -> Span { match self { - Self::Named(named) => named.span(), Self::Pos(expr) => expr.span(), + Self::Named(named) => named.span(), Self::Spread(expr) => expr.span(), } } @@ -771,8 +740,6 @@ impl ClosureExpr { /// /// This only exists if you use the function syntax sugar: `let f(x) = y`. pub fn name(&self) -> Option { - // `first_convert_child` does not work here because of the Option in the - // Result. self.0.cast_first_child() } @@ -788,22 +755,11 @@ impl ClosureExpr { /// The body of the closure. pub fn body(&self) -> Expr { - // The filtering for the NodeKind is necessary here because otherwise, - // `first_convert_child` will use the Ident if present. self.0.cast_last_child().expect("closure is missing body") } - - /// The red node reference of the body of the closure. - pub fn body_ref(&self) -> RedRef { - self.0 - .children() - .filter(|x| x.cast::().is_some()) - .last() - .unwrap() - } } -/// An parameter to a closure. +/// A parameter to a closure. #[derive(Debug, Clone, PartialEq)] pub enum ClosureParam { /// A positional parameter: `x`. @@ -817,17 +773,10 @@ pub enum ClosureParam { impl TypedNode for ClosureParam { fn from_red(node: RedRef) -> Option { match node.kind() { - NodeKind::Ident(id) => { - Some(ClosureParam::Pos(Ident::new_unchecked(id, node.span()))) - } - NodeKind::Named => Some(ClosureParam::Named( - node.cast().expect("named closure parameter is missing name"), - )), - NodeKind::Spread => Some(ClosureParam::Sink( - node.cast_first_child() - .expect("closure parameter sink is missing identifier"), - )), - _ => Some(ClosureParam::Pos(node.cast()?)), + NodeKind::Ident(_) => node.cast().map(ClosureParam::Pos), + NodeKind::Named => node.cast().map(ClosureParam::Named), + NodeKind::Spread => node.cast_first_child().map(ClosureParam::Sink), + _ => None, } } } @@ -840,9 +789,7 @@ node! { impl WithExpr { /// The function to apply the arguments to. pub fn callee(&self) -> Expr { - self.0 - .cast_first_child() - .expect("with expression is missing callee expression") + self.0.cast_first_child().expect("with expression is missing callee") } /// The arguments to apply to the function. @@ -861,17 +808,16 @@ node! { impl LetExpr { /// The binding to assign to. pub fn binding(&self) -> Ident { - if let Some(c) = self.0.cast_first_child() { - c - } else if let Some(w) = self.0.typed_child(&NodeKind::WithExpr) { - // Can't do an `first_convert_child` here because the WithExpr's - // callee has to be an identifier. - w.cast_first_child() - .expect("with expression is missing an identifier callee") - } else if let Some(Expr::Closure(c)) = self.0.cast_last_child() { - c.name().expect("closure is missing an identifier name") - } else { - panic!("let expression is missing either an identifier or a with expression") + match self.0.cast_first_child() { + Some(Expr::Ident(binding)) => binding, + Some(Expr::With(with)) => match with.callee() { + Expr::Ident(binding) => binding, + _ => panic!("let .. with callee must be identifier"), + }, + Some(Expr::Closure(closure)) => { + closure.name().expect("let-bound closure is missing name") + } + _ => panic!("let expression is missing binding"), } } @@ -880,24 +826,10 @@ impl LetExpr { if self.0.cast_first_child::().is_some() { self.0.children().filter_map(RedRef::cast).nth(1) } else { - Some( - self.0 - .cast_first_child() - .expect("let expression is missing a with expression"), - ) + // This is a let .. with expression. + self.0.cast_first_child() } } - - /// The red node reference for the expression the binding is initialized - /// with. - pub fn init_ref(&self) -> RedRef { - if self.0.cast_first_child::().is_some() { - self.0.children().filter(|x| x.cast::().is_some()).nth(1) - } else { - self.0.children().find(|x| x.cast::().is_some()) - } - .unwrap() - } } node! { @@ -908,16 +840,12 @@ node! { impl ImportExpr { /// The items to be imported. pub fn imports(&self) -> Imports { - self.0 - .cast_first_child() - .expect("import expression is missing import list") + self.0.cast_first_child().expect("import is missing items") } /// The location of the importable file. pub fn path(&self) -> Expr { - self.0 - .cast_first_child() - .expect("import expression is missing path expression") + self.0.cast_last_child().expect("import is missing path") } } @@ -926,8 +854,8 @@ impl ImportExpr { pub enum Imports { /// All items in the scope of the file should be imported. Wildcard, - /// The specified identifiers from the file should be imported. - Idents(Vec), + /// The specified items from the file should be imported. + Items(Vec), } impl TypedNode for Imports { @@ -935,8 +863,8 @@ impl TypedNode for Imports { match node.kind() { NodeKind::Star => Some(Imports::Wildcard), NodeKind::ImportItems => { - let idents = node.children().filter_map(RedRef::cast).collect(); - Some(Imports::Idents(idents)) + let items = node.children().filter_map(RedRef::cast).collect(); + Some(Imports::Items(items)) } _ => None, } @@ -951,9 +879,7 @@ node! { impl IncludeExpr { /// The location of the file to be included. pub fn path(&self) -> Expr { - self.0 - .cast_first_child() - .expect("include expression is missing path expression") + self.0.cast_last_child().expect("include is missing path") } } @@ -965,9 +891,7 @@ node! { impl IfExpr { /// The condition which selects the body to evaluate. pub fn condition(&self) -> Expr { - self.0 - .cast_first_child() - .expect("if expression is missing condition expression") + self.0.cast_first_child().expect("if expression is missing condition") } /// The expression to evaluate if the condition is true. @@ -976,7 +900,7 @@ impl IfExpr { .children() .filter_map(RedRef::cast) .nth(1) - .expect("if expression is missing if body") + .expect("if expression is missing body") } /// The expression to evaluate if the condition is false. @@ -993,18 +917,12 @@ node! { impl WhileExpr { /// The condition which selects whether to evaluate the body. pub fn condition(&self) -> Expr { - self.0 - .cast_first_child() - .expect("while loop expression is missing condition expression") + self.0.cast_first_child().expect("while loop is missing condition") } /// The expression to evaluate while the condition is true. pub fn body(&self) -> Expr { - self.0 - .children() - .filter_map(RedRef::cast) - .nth(1) - .expect("while loop expression is missing body") + self.0.cast_last_child().expect("while loop is missing body") } } @@ -1016,34 +934,17 @@ node! { impl ForExpr { /// The pattern to assign to. pub fn pattern(&self) -> ForPattern { - self.0 - .cast_first_child() - .expect("for loop expression is missing pattern") + self.0.cast_first_child().expect("for loop is missing pattern") } /// The expression to iterate over. pub fn iter(&self) -> Expr { - self.0 - .cast_first_child() - .expect("for loop expression is missing iterable expression") + self.0.cast_first_child().expect("for loop is missing iterable") } /// The expression to evaluate for each iteration. pub fn body(&self) -> Expr { - self.0 - .children() - .filter_map(RedRef::cast) - .last() - .expect("for loop expression is missing body") - } - - /// The red node reference for the expression to evaluate for each iteration. - pub fn body_ref(&self) -> RedRef { - self.0 - .children() - .filter(|x| x.cast::().is_some()) - .last() - .unwrap() + self.0.cast_last_child().expect("for loop is missing body") } } @@ -1062,19 +963,11 @@ impl ForPattern { /// The value part of the pattern. pub fn value(&self) -> Ident { - self.0 - .cast_last_child() - .expect("for-in loop pattern is missing value") + self.0.cast_last_child().expect("for loop pattern is missing value") } } -/// An unicode identifier with a few extra permissible characters. -/// -/// In addition to what is specified in the [Unicode Standard][uax31], we allow: -/// - `_` as a starting character, -/// - `_` and `-` as continuing characters. -/// -/// [uax31]: http://www.unicode.org/reports/tr31/ +/// An identifier. #[derive(Debug, Clone, PartialEq)] pub struct Ident { /// The source code location. @@ -1083,44 +976,13 @@ pub struct Ident { pub string: EcoString, } -impl Ident { - /// Create a new identifier from a string checking that it is a valid. - pub fn new( - string: impl AsRef + Into, - span: impl Into, - ) -> Option { - is_ident(string.as_ref()) - .then(|| Self { span: span.into(), string: string.into() }) - } - - /// Create a new identifier from a string and a span. - /// - /// The `string` must be a valid identifier. - #[track_caller] - pub fn new_unchecked(string: impl Into, span: Span) -> Self { - let string = string.into(); - debug_assert!(is_ident(&string), "`{}` is not a valid identifier", string); - Self { span, string } - } - - /// Return a reference to the underlying string. - pub fn as_str(&self) -> &str { - &self.string - } -} - -impl Deref for Ident { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.as_str() - } -} - impl TypedNode for Ident { fn from_red(node: RedRef) -> Option { match node.kind() { - NodeKind::Ident(string) => Some(Ident::new_unchecked(string, node.span())), + NodeKind::Ident(string) => Some(Ident { + span: node.span(), + string: string.clone(), + }), _ => None, } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 022b51de0..fc05ad50c 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -5,7 +5,6 @@ mod pretty; mod span; use std::fmt::{self, Debug, Display, Formatter}; -use std::mem; use std::rc::Rc; pub use pretty::*; @@ -40,14 +39,6 @@ impl Green { self.data().kind() } - /// Set the type of the node. - pub fn set_kind(&mut self, kind: NodeKind) { - match self { - Self::Node(node) => Rc::make_mut(node).data.set_kind(kind), - Self::Token(data) => data.set_kind(kind), - } - } - /// The length of the node. pub fn len(&self) -> usize { self.data().len() @@ -68,6 +59,18 @@ impl Green { Green::Token(_) => &[], } } + + /// Change the type of the node. + pub fn convert(&mut self, kind: NodeKind) { + match self { + Self::Node(node) => { + let node = Rc::make_mut(node); + node.erroneous |= kind.is_error(); + node.data.kind = kind; + } + Self::Token(data) => data.kind = kind, + } + } } impl Default for Green { @@ -161,11 +164,6 @@ impl GreenData { &self.kind } - /// Set the type of the node. - pub fn set_kind(&mut self, kind: NodeKind) { - self.kind = kind; - } - /// The length of the node. pub fn len(&self) -> usize { self.len @@ -178,123 +176,7 @@ impl From for Green { } } -/// A borrowed wrapper for a [`GreenNode`] with span information. -/// -/// Borrowed variant of [`RedNode`]. Can be [cast](Self::cast) to an AST node. -#[derive(Copy, Clone, PartialEq)] -pub struct RedRef<'a> { - id: SourceId, - offset: usize, - green: &'a Green, -} - -impl<'a> RedRef<'a> { - /// Convert to an owned representation. - pub fn own(self) -> RedNode { - RedNode { - id: self.id, - offset: self.offset, - green: self.green.clone(), - } - } - - /// The type of the node. - pub fn kind(self) -> &'a NodeKind { - self.green.kind() - } - - /// The length of the node. - pub fn len(self) -> usize { - self.green.len() - } - - /// The span of the node. - pub fn span(self) -> Span { - Span::new(self.id, self.offset, self.offset + self.green.len()) - } - - /// Whether the node or its children contain an error. - pub fn erroneous(self) -> bool { - self.green.erroneous() - } - - /// The error messages for this node and its descendants. - pub fn errors(self) -> Vec { - if !self.erroneous() { - return vec![]; - } - - match self.kind() { - NodeKind::Error(pos, msg) => { - let span = match pos { - ErrorPosition::Start => self.span().at_start(), - ErrorPosition::Full => self.span(), - ErrorPosition::End => self.span().at_end(), - }; - - vec![Error::new(span, msg.to_string())] - } - _ => self - .children() - .filter(|red| red.erroneous()) - .flat_map(|red| red.errors()) - .collect(), - } - } - - /// Convert the node to a typed AST node. - pub fn cast(self) -> Option - where - T: TypedNode, - { - T::from_red(self) - } - - /// The node's children. - pub fn children(self) -> impl Iterator> { - let children = match &self.green { - Green::Node(node) => node.children(), - Green::Token(_) => &[], - }; - - let mut cursor = self.offset; - children.iter().map(move |green| { - let offset = cursor; - cursor += green.len(); - RedRef { id: self.id, offset, green } - }) - } - - /// Get the first child of some type. - pub(crate) fn typed_child(self, kind: &NodeKind) -> Option> { - self.children() - .find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind)) - } - - /// Get the first child that can cast to some AST type. - pub(crate) fn cast_first_child(self) -> Option { - self.children().find_map(RedRef::cast) - } - - /// Get the last child that can cast to some AST type. - pub(crate) fn cast_last_child(self) -> Option { - self.children().filter_map(RedRef::cast).last() - } -} - -impl Debug for RedRef<'_> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {:?}", self.kind(), self.span())?; - let mut children = self.children().peekable(); - if children.peek().is_some() { - f.write_str(" ")?; - f.debug_list().entries(children.map(RedRef::own)).finish()?; - } - Ok(()) - } -} - -/// A owned wrapper for a [`GreenNode`] with span information. +/// A owned wrapper for a green node with span information. /// /// Owned variant of [`RedRef`]. Can be [cast](Self::cast) to an AST nodes. #[derive(Clone, PartialEq)] @@ -348,22 +230,17 @@ impl RedNode { } /// The children of the node. - pub fn children(&self) -> impl Iterator> { + pub fn children(&self) -> Children<'_> { self.as_ref().children() } - /// Get the first child of some type. - pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { - self.as_ref().typed_child(kind).map(RedRef::own) - } - /// Get the first child that can cast to some AST type. - pub(crate) fn cast_first_child(&self) -> Option { + pub fn cast_first_child(&self) -> Option { self.as_ref().cast_first_child() } /// Get the last child that can cast to some AST type. - pub(crate) fn cast_last_child(&self) -> Option { + pub fn cast_last_child(&self) -> Option { self.as_ref().cast_last_child() } } @@ -374,6 +251,146 @@ impl Debug for RedNode { } } +/// A borrowed wrapper for a green node with span information. +/// +/// Borrowed variant of [`RedNode`]. Can be [cast](Self::cast) to an AST node. +#[derive(Copy, Clone, PartialEq)] +pub struct RedRef<'a> { + id: SourceId, + offset: usize, + green: &'a Green, +} + +impl<'a> RedRef<'a> { + /// Convert to an owned representation. + pub fn own(self) -> RedNode { + RedNode { + id: self.id, + offset: self.offset, + green: self.green.clone(), + } + } + + /// The type of the node. + pub fn kind(self) -> &'a NodeKind { + self.green.kind() + } + + /// The length of the node. + pub fn len(self) -> usize { + self.green.len() + } + + /// The span of the node. + pub fn span(self) -> Span { + Span::new(self.id, self.offset, self.offset + self.green.len()) + } + + /// The error messages for this node and its descendants. + pub fn errors(self) -> Vec { + if !self.green.erroneous() { + return vec![]; + } + + match self.kind() { + NodeKind::Error(pos, msg) => { + let span = match pos { + ErrorPos::Start => self.span().at_start(), + ErrorPos::Full => self.span(), + ErrorPos::End => self.span().at_end(), + }; + + vec![Error::new(span, msg.to_string())] + } + _ => self + .children() + .filter(|red| red.green.erroneous()) + .flat_map(|red| red.errors()) + .collect(), + } + } + + /// Convert the node to a typed AST node. + pub fn cast(self) -> Option + where + T: TypedNode, + { + T::from_red(self) + } + + /// The node's children. + pub fn children(self) -> Children<'a> { + let children = match &self.green { + Green::Node(node) => node.children(), + Green::Token(_) => &[], + }; + + Children { + id: self.id, + iter: children.iter(), + front: self.offset, + back: self.offset + self.len(), + } + } + + /// Get the first child that can cast to some AST type. + pub fn cast_first_child(self) -> Option { + self.children().find_map(RedRef::cast) + } + + /// Get the last child that can cast to some AST type. + pub fn cast_last_child(self) -> Option { + self.children().rev().find_map(RedRef::cast) + } +} + +impl Debug for RedRef<'_> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:?}: {:?}", self.kind(), self.span())?; + let mut children = self.children().peekable(); + if children.peek().is_some() { + f.write_str(" ")?; + f.debug_list().entries(children.map(RedRef::own)).finish()?; + } + Ok(()) + } +} + +/// An iterator over the children of a red node. +pub struct Children<'a> { + id: SourceId, + iter: std::slice::Iter<'a, Green>, + front: usize, + back: usize, +} + +impl<'a> Iterator for Children<'a> { + type Item = RedRef<'a>; + + fn next(&mut self) -> Option { + self.iter.next().map(|green| { + let offset = self.front; + self.front += green.len(); + RedRef { id: self.id, offset, green } + }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl DoubleEndedIterator for Children<'_> { + fn next_back(&mut self) -> Option { + self.iter.next_back().map(|green| { + self.back -= green.len(); + RedRef { id: self.id, offset: self.back, green } + }) + } +} + +impl ExactSizeIterator for Children<'_> {} + /// All syntactical building blocks that can be part of a Typst document. /// /// Can be emitted as a token by the tokenizer or as part of a green node by @@ -533,7 +550,7 @@ pub enum NodeKind { Array, /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. Dict, - /// A named argument: `thickness: 3pt`. + /// A named pair: `thickness: 3pt`. Named, /// A grouped expression: `(1 + 2)`. Group, @@ -582,12 +599,12 @@ pub enum NodeKind { /// The comment can contain nested block comments. BlockComment, /// Tokens that appear in the wrong place. - Error(ErrorPosition, EcoString), + Error(ErrorPos, EcoString), /// Unknown character sequences. Unknown(EcoString), } -/// Payload of a raw block: `` `...` ``. +/// Payload of a raw block node. #[derive(Debug, Clone, PartialEq)] pub struct RawData { /// The raw text in the block. @@ -600,19 +617,19 @@ pub struct RawData { pub block: bool, } -/// Payload of a math formula: `$2pi + x$` or `$[f'(x) = x^2]$`. +/// Payload of a math formula node. #[derive(Debug, Clone, PartialEq)] pub struct MathData { - /// The formula between the dollars. + /// The formula between the dollars / brackets. pub formula: EcoString, /// Whether the formula is display-level, that is, it is surrounded by - /// `$[..]`. + /// `$[..]$`. pub display: bool, } /// Where in a node an error should be annotated. #[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum ErrorPosition { +pub enum ErrorPos { /// At the start of the node. Start, /// Over the full width of the node. diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index b396a39c7..fa423e94b 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -141,7 +141,7 @@ impl Pretty for RawNode { // Language tag. if let Some(lang) = &self.lang { - lang.pretty(p); + p.push_str(lang); } // Start untrimming. @@ -492,7 +492,7 @@ impl Pretty for Imports { fn pretty(&self, p: &mut Printer) { match self { Self::Wildcard => p.push('*'), - Self::Idents(idents) => { + Self::Items(idents) => { p.join(idents, ", ", |item, p| item.pretty(p)); } } @@ -508,7 +508,7 @@ impl Pretty for IncludeExpr { impl Pretty for Ident { fn pretty(&self, p: &mut Printer) { - p.push_str(self.as_str()); + p.push_str(&self.string); } } From 75fffc1f9b6ef8bf258b2b1845a4ba74a0f5f2c1 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sun, 7 Nov 2021 23:31:42 +0100 Subject: [PATCH 17/18] Fine-grained capturing --- src/eval/capture.rs | 173 ++++++++++++++++-- src/eval/mod.rs | 63 +++---- src/eval/scope.rs | 4 +- src/geom/relative.rs | 2 +- src/parse/mod.rs | 2 +- src/source.rs | 2 +- src/syntax/ast.rs | 350 ++++++++++++++++++------------------- src/syntax/mod.rs | 15 +- src/syntax/pretty.rs | 26 +-- src/syntax/span.rs | 5 + tests/typ/code/closure.typ | 46 +++++ 11 files changed, 443 insertions(+), 245 deletions(-) diff --git a/src/eval/capture.rs b/src/eval/capture.rs index 786da36e7..4e24bc908 100644 --- a/src/eval/capture.rs +++ b/src/eval/capture.rs @@ -1,32 +1,23 @@ use std::rc::Rc; -use super::{Scope, Scopes}; -use crate::syntax::{NodeKind, RedRef}; +use super::{Scope, Scopes, Value}; +use crate::syntax::ast::{ClosureParam, Expr, Ident, Imports, TypedNode}; +use crate::syntax::RedRef; /// A visitor that captures variable slots. pub struct CapturesVisitor<'a> { external: &'a Scopes<'a>, + internal: Scopes<'a>, captures: Scope, } impl<'a> CapturesVisitor<'a> { /// Create a new visitor for the given external scopes. pub fn new(external: &'a Scopes) -> Self { - Self { external, captures: Scope::new() } - } - - pub fn visit(&mut self, node: RedRef) { - match node.kind() { - NodeKind::Ident(ident) => { - if let Some(slot) = self.external.get(ident.as_str()) { - self.captures.def_slot(ident.as_str(), Rc::clone(slot)); - } - } - _ => { - for child in node.children() { - self.visit(child); - } - } + Self { + external, + internal: Scopes::new(None), + captures: Scope::new(), } } @@ -34,4 +25,152 @@ impl<'a> CapturesVisitor<'a> { pub fn finish(self) -> Scope { self.captures } + + /// Bind a new internal variable. + pub fn bind(&mut self, ident: Ident) { + self.internal.def_mut(ident.take(), Value::None); + } + + /// Capture a variable if it isn't internal. + pub fn capture(&mut self, ident: Ident) { + if self.internal.get(&ident).is_none() { + if let Some(slot) = self.external.get(&ident) { + self.captures.def_slot(ident.take(), Rc::clone(slot)); + } + } + } + + /// Visit any node and collect all captured variables. + pub fn visit(&mut self, node: RedRef) { + match node.cast() { + // Every identifier is a potential variable that we need to capture. + // Identifiers that shouldn't count as captures because they + // actually bind a new name are handled further below (individually + // through the expressions that contain them). + Some(Expr::Ident(ident)) => self.capture(ident), + + // A closure contains parameter bindings, which are bound before the + // body is evaluated. Take must be taken so that the default values + // of named parameters cannot access previous parameter bindings. + Some(Expr::Closure(expr)) => { + for param in expr.params() { + if let ClosureParam::Named(named) = param { + self.visit(named.expr().as_red()); + } + } + + for param in expr.params() { + match param { + ClosureParam::Pos(ident) => self.bind(ident), + ClosureParam::Named(named) => self.bind(named.name()), + ClosureParam::Sink(ident) => self.bind(ident), + } + } + + self.visit(expr.body().as_red()); + } + + // A let expression contains a binding, but that binding is only + // active after the body is evaluated. + Some(Expr::Let(expr)) => { + if let Some(init) = expr.init() { + self.visit(init.as_red()); + } + self.bind(expr.binding()); + } + + // A for loop contains one or two bindings in its pattern. These are + // active after the iterable is evaluated but before the body is + // evaluated. + Some(Expr::For(expr)) => { + self.visit(expr.iter().as_red()); + let pattern = expr.pattern(); + if let Some(key) = pattern.key() { + self.bind(key); + } + self.bind(pattern.value()); + self.visit(expr.body().as_red()); + } + + // An import contains items, but these are active only after the + // path is evaluated. + Some(Expr::Import(expr)) => { + self.visit(expr.path().as_red()); + if let Imports::Items(items) = expr.imports() { + for item in items { + self.bind(item); + } + } + } + + // Blocks and templates create a scope. + Some(Expr::Block(_) | Expr::Template(_)) => { + self.internal.enter(); + for child in node.children() { + self.visit(child); + } + self.internal.exit(); + } + + // Everything else is traversed from left to right. + _ => { + for child in node.children() { + self.visit(child); + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parse::parse; + use crate::source::SourceId; + use crate::syntax::RedNode; + + #[track_caller] + fn test(src: &str, result: &[&str]) { + let green = parse(src); + let red = RedNode::from_root(green, SourceId::from_raw(0)); + + let mut scopes = Scopes::new(None); + scopes.def_const("x", 0); + scopes.def_const("y", 0); + scopes.def_const("z", 0); + + let mut visitor = CapturesVisitor::new(&scopes); + visitor.visit(red.as_ref()); + + let captures = visitor.finish(); + let mut names: Vec<_> = captures.iter().map(|(k, _)| k).collect(); + names.sort(); + + assert_eq!(names, result); + } + + #[test] + fn test_captures() { + // Let binding and function definition. + test("#let x = x", &["x"]); + test("#let x; {x + y}", &["y"]); + test("#let f(x, y) = x + y", &[]); + + // Closure with different kinds of params. + test("{(x, y) => x + z}", &["z"]); + test("{(x: y, z) => x + z}", &["y"]); + test("{(..x) => x + y}", &["y"]); + test("{(x, y: x + z) => x + y}", &["x", "z"]); + + // For loop. + test("#for x in y { x + z }", &["y", "z"]); + test("#for x, y in y { x + y }", &["y"]); + + // Import. + test("#import x, y from z", &["z"]); + test("#import x, y, z from x + y", &["x", "y"]); + + // Scoping. + test("{ let x = 1; { let y = 2; y }; x + y }", &["y"]); + } } diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 7c984691a..540b58b98 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -174,8 +174,8 @@ impl Eval for Expr { fn eval(&self, ctx: &mut EvalContext) -> TypResult { match self { - Self::Ident(v) => v.eval(ctx), Self::Lit(v) => v.eval(ctx), + Self::Ident(v) => v.eval(ctx), Self::Array(v) => v.eval(ctx).map(Value::Array), Self::Dict(v) => v.eval(ctx).map(Value::Dict), Self::Template(v) => v.eval(ctx).map(Value::Template), @@ -200,17 +200,17 @@ impl Eval for Lit { type Output = Value; fn eval(&self, _: &mut EvalContext) -> TypResult { - Ok(match *self { - Self::None(_) => Value::None, - Self::Auto(_) => Value::Auto, - Self::Bool(_, v) => Value::Bool(v), - Self::Int(_, v) => Value::Int(v), - Self::Float(_, v) => Value::Float(v), - Self::Length(_, v, unit) => Value::Length(Length::with_unit(v, unit)), - Self::Angle(_, v, unit) => Value::Angle(Angle::with_unit(v, unit)), - Self::Percent(_, v) => Value::Relative(Relative::new(v / 100.0)), - Self::Fractional(_, v) => Value::Fractional(Fractional::new(v)), - Self::Str(_, ref v) => Value::Str(v.into()), + Ok(match self.kind() { + LitKind::None => Value::None, + LitKind::Auto => Value::Auto, + LitKind::Bool(v) => Value::Bool(v), + LitKind::Int(v) => Value::Int(v), + LitKind::Float(v) => Value::Float(v), + LitKind::Length(v, unit) => Value::Length(Length::with_unit(v, unit)), + LitKind::Angle(v, unit) => Value::Angle(Angle::with_unit(v, unit)), + LitKind::Percent(v) => Value::Relative(Relative::new(v / 100.0)), + LitKind::Fractional(v) => Value::Fractional(Fractional::new(v)), + LitKind::Str(ref v) => Value::Str(v.into()), }) } } @@ -219,9 +219,9 @@ impl Eval for Ident { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - match ctx.scopes.get(&self.string) { + match ctx.scopes.get(self) { Some(slot) => Ok(slot.borrow().clone()), - None => bail!(self.span, "unknown variable"), + None => bail!(self.span(), "unknown variable"), } } } @@ -239,7 +239,7 @@ impl Eval for DictExpr { fn eval(&self, ctx: &mut EvalContext) -> TypResult { self.items() - .map(|x| Ok((x.name().string.into(), x.expr().eval(ctx)?))) + .map(|x| Ok((x.name().take().into(), x.expr().eval(ctx)?))) .collect() } } @@ -401,7 +401,7 @@ impl Eval for CallArgs { CallArg::Named(x) => { items.push(Arg { span, - name: Some(x.name().string.into()), + name: Some(x.name().take().into()), value: Spanned::new(x.expr().eval(ctx)?, x.expr().span()), }); } @@ -457,23 +457,23 @@ impl Eval for ClosureExpr { for param in self.params() { match param { ClosureParam::Pos(name) => { - params.push((name.string, None)); + params.push((name.take(), None)); } ClosureParam::Named(named) => { - params.push((named.name().string, Some(named.expr().eval(ctx)?))); + params.push((named.name().take(), Some(named.expr().eval(ctx)?))); } ClosureParam::Sink(name) => { if sink.is_some() { - bail!(name.span, "only one argument sink is allowed"); + bail!(name.span(), "only one argument sink is allowed"); } - sink = Some(name.string); + sink = Some(name.take()); } } } // Clone the body expression so that we don't have a lifetime // dependence on the AST. - let name = self.name().map(|name| name.string); + let name = self.name().map(Ident::take); let body = self.body(); // Define the actual function. @@ -533,7 +533,7 @@ impl Eval for LetExpr { Some(expr) => expr.eval(ctx)?, None => Value::None, }; - ctx.scopes.def_mut(self.binding().string, value); + ctx.scopes.def_mut(self.binding().take(), value); Ok(Value::None) } } @@ -589,7 +589,7 @@ impl Eval for ForExpr { #[allow(unused_parens)] for ($($value),*) in $iter { - $(ctx.scopes.def_mut(&$binding.string, $value);)* + $(ctx.scopes.def_mut(&$binding, $value);)* let value = self.body().eval(ctx)?; output = ops::join(output, value) @@ -603,7 +603,10 @@ impl Eval for ForExpr { let iter = self.iter().eval(ctx)?; let pattern = self.pattern(); - match (pattern.key(), pattern.value(), iter) { + let key = pattern.key().map(Ident::take); + let value = pattern.value().take(); + + match (key, value, iter) { (None, v, Value::Str(string)) => iter!(for (v => value) in string.iter()), (None, v, Value::Array(array)) => { iter!(for (v => value) in array.into_iter()) @@ -644,10 +647,10 @@ impl Eval for ImportExpr { } Imports::Items(idents) => { for ident in idents { - if let Some(slot) = module.scope.get(&ident.string) { - ctx.scopes.def_mut(ident.string, slot.borrow().clone()); + if let Some(slot) = module.scope.get(&ident) { + ctx.scopes.def_mut(ident.take(), slot.borrow().clone()); } else { - bail!(ident.span, "unresolved import"); + bail!(ident.span(), "unresolved import"); } } } @@ -691,12 +694,12 @@ impl Access for Expr { impl Access for Ident { fn access<'a>(&self, ctx: &'a mut EvalContext) -> TypResult> { - match ctx.scopes.get(&self.string) { + match ctx.scopes.get(self) { Some(slot) => match slot.try_borrow_mut() { Ok(guard) => Ok(guard), - Err(_) => bail!(self.span, "cannot mutate a constant"), + Err(_) => bail!(self.span(), "cannot mutate a constant"), }, - None => bail!(self.span, "unknown variable"), + None => bail!(self.span(), "unknown variable"), } } } diff --git a/src/eval/scope.rs b/src/eval/scope.rs index eb057ae3b..2290affdb 100644 --- a/src/eval/scope.rs +++ b/src/eval/scope.rs @@ -120,6 +120,8 @@ impl Scope { impl Debug for Scope { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - self.values.fmt(f) + f.debug_map() + .entries(self.values.iter().map(|(k, v)| (k, v.borrow()))) + .finish() } } diff --git a/src/geom/relative.rs b/src/geom/relative.rs index 754aa6c85..e46c51de1 100644 --- a/src/geom/relative.rs +++ b/src/geom/relative.rs @@ -3,7 +3,7 @@ use super::*; /// A relative length. /// /// _Note_: `50%` is represented as `0.5` here, but stored as `50.0` in the -/// corresponding [literal](crate::syntax::ast::Lit::Percent). +/// corresponding [literal](crate::syntax::ast::LitKind::Percent). #[derive(Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct Relative(N64); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 505482cad..78e4f896d 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -418,7 +418,7 @@ fn item(p: &mut Parser) -> ParseResult { if p.at(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { - if matches!(marker.peek(p).unwrap().kind(), &NodeKind::Ident(_)) { + if let Some(NodeKind::Ident(_)) = marker.peek(p).map(|c| c.kind()) { p.eat(); expr(p) } else { diff --git a/src/source.rs b/src/source.rs index 46d6b84bb..713380c58 100644 --- a/src/source.rs +++ b/src/source.rs @@ -145,7 +145,7 @@ impl SourceFile { } pub fn ast(&self) -> TypResult { - let red = RedNode::new_root(self.root.clone(), self.id); + let red = RedNode::from_root(self.root.clone(), self.id); let errors = red.errors(); if errors.is_empty() { Ok(red.cast().unwrap()) diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index dc71e2295..067bd6daf 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,6 +1,8 @@ //! A typed layer over the red-green tree. -use super::{NodeKind, RedNode, RedRef, Span}; +use std::ops::Deref; + +use super::{Green, GreenData, NodeKind, RedNode, RedRef, Span}; use crate::geom::{AngularUnit, LengthUnit}; use crate::util::EcoString; @@ -8,13 +10,24 @@ use crate::util::EcoString; pub trait TypedNode: Sized { /// Convert from a red node to a typed node. fn from_red(value: RedRef) -> Option; + + /// A reference to the underlying red node. + fn as_red(&self) -> RedRef<'_>; + + /// The source code location. + fn span(&self) -> Span { + self.as_red().span() + } } macro_rules! node { ($(#[$attr:meta])* $name:ident) => { - node!{$(#[$attr])* $name => $name} + node!{$(#[$attr])* $name: $name} }; - ($(#[$attr:meta])* $variant:ident => $name:ident) => { + ($(#[$attr:meta])* $name:ident: $variant:ident) => { + node!{$(#[$attr])* $name: NodeKind::$variant} + }; + ($(#[$attr:meta])* $name:ident: $($variant:pat)|*) => { #[derive(Debug, Clone, PartialEq)] #[repr(transparent)] $(#[$attr])* @@ -22,22 +35,14 @@ macro_rules! node { impl TypedNode for $name { fn from_red(node: RedRef) -> Option { - if node.kind() != &NodeKind::$variant { - return None; + if matches!(node.kind(), $($variant)|*) { + Some(Self(node.own())) + } else { + None } - - Some(Self(node.own())) - } - } - - impl $name { - /// The source code location. - pub fn span(&self) -> Span { - self.0.span() } - /// The underlying red node. - pub fn as_red(&self) -> RedRef { + fn as_red(&self) -> RedRef<'_> { self.0.as_ref() } } @@ -52,7 +57,27 @@ node! { impl Markup { /// The markup nodes. pub fn nodes(&self) -> impl Iterator + '_ { - self.0.children().filter_map(RedRef::cast) + self.0.children().filter_map(|node| match node.kind() { + NodeKind::Space(_) => Some(MarkupNode::Space), + NodeKind::Linebreak => Some(MarkupNode::Linebreak), + NodeKind::Parbreak => Some(MarkupNode::Parbreak), + NodeKind::Strong => Some(MarkupNode::Strong), + NodeKind::Emph => Some(MarkupNode::Emph), + NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), + NodeKind::UnicodeEscape(c) => Some(MarkupNode::Text((*c).into())), + NodeKind::EnDash => Some(MarkupNode::Text("\u{2013}".into())), + NodeKind::EmDash => Some(MarkupNode::Text("\u{2014}".into())), + NodeKind::NonBreakingSpace => Some(MarkupNode::Text("\u{00A0}".into())), + NodeKind::Raw(raw) => Some(MarkupNode::Raw(RawNode { + block: raw.block, + lang: raw.lang.clone(), + text: raw.text.clone(), + })), + NodeKind::Heading => node.cast().map(MarkupNode::Heading), + NodeKind::List => node.cast().map(MarkupNode::List), + NodeKind::Enum => node.cast().map(MarkupNode::Enum), + _ => node.cast().map(MarkupNode::Expr), + }) } } @@ -83,28 +108,6 @@ pub enum MarkupNode { Expr(Expr), } -impl TypedNode for MarkupNode { - fn from_red(node: RedRef) -> Option { - match node.kind() { - NodeKind::Space(_) => Some(MarkupNode::Space), - NodeKind::Linebreak => Some(MarkupNode::Linebreak), - NodeKind::Parbreak => Some(MarkupNode::Parbreak), - NodeKind::Strong => Some(MarkupNode::Strong), - NodeKind::Emph => Some(MarkupNode::Emph), - NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), - NodeKind::UnicodeEscape(c) => Some(MarkupNode::Text((*c).into())), - NodeKind::EnDash => Some(MarkupNode::Text("\u{2013}".into())), - NodeKind::EmDash => Some(MarkupNode::Text("\u{2014}".into())), - NodeKind::NonBreakingSpace => Some(MarkupNode::Text("\u{00A0}".into())), - NodeKind::Raw(_) => node.cast().map(MarkupNode::Raw), - NodeKind::Heading => node.cast().map(MarkupNode::Heading), - NodeKind::List => node.cast().map(MarkupNode::List), - NodeKind::Enum => node.cast().map(MarkupNode::Enum), - _ => node.cast().map(MarkupNode::Expr), - } - } -} - /// A raw block with optional syntax highlighting: `` `...` ``. #[derive(Debug, Clone, PartialEq)] pub struct RawNode { @@ -118,22 +121,9 @@ pub struct RawNode { pub block: bool, } -impl TypedNode for RawNode { - fn from_red(node: RedRef) -> Option { - match node.kind() { - NodeKind::Raw(raw) => Some(Self { - block: raw.block, - lang: raw.lang.clone(), - text: raw.text.clone(), - }), - _ => None, - } - } -} - node! { /// A section heading: `= Introduction`. - Heading => HeadingNode + HeadingNode: Heading } impl HeadingNode { @@ -154,7 +144,7 @@ impl HeadingNode { node! { /// An item in an unordered list: `- ...`. - List => ListNode + ListNode: List } impl ListNode { @@ -166,7 +156,7 @@ impl ListNode { node! { /// An item in an enumeration (ordered list): `1. ...`. - Enum => EnumNode + EnumNode: Enum } impl EnumNode { @@ -190,10 +180,10 @@ impl EnumNode { /// An expression. #[derive(Debug, Clone, PartialEq)] pub enum Expr { - /// An identifier: `left`. - Ident(Ident), /// A literal: `1`, `true`, ... Lit(Lit), + /// An identifier: `left`. + Ident(Ident), /// An array expression: `(1, "hi", 12cm)`. Array(ArrayExpr), /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. @@ -251,6 +241,29 @@ impl TypedNode for Expr { _ => node.cast().map(Self::Lit), } } + + fn as_red(&self) -> RedRef<'_> { + match self { + Self::Lit(v) => v.as_red(), + Self::Ident(v) => v.as_red(), + Self::Array(v) => v.as_red(), + Self::Dict(v) => v.as_red(), + Self::Template(v) => v.as_red(), + Self::Group(v) => v.as_red(), + Self::Block(v) => v.as_red(), + Self::Unary(v) => v.as_red(), + Self::Binary(v) => v.as_red(), + Self::Call(v) => v.as_red(), + Self::Closure(v) => v.as_red(), + Self::With(v) => v.as_red(), + Self::Let(v) => v.as_red(), + Self::If(v) => v.as_red(), + Self::While(v) => v.as_red(), + Self::For(v) => v.as_red(), + Self::Import(v) => v.as_red(), + Self::Include(v) => v.as_red(), + } + } } impl Expr { @@ -267,99 +280,72 @@ impl Expr { | Self::Include(_) ) } +} - /// Return the expression's span. - pub fn span(&self) -> Span { - match self { - Self::Ident(ident) => ident.span, - Self::Lit(lit) => lit.span(), - Self::Array(array) => array.span(), - Self::Dict(dict) => dict.span(), - Self::Template(template) => template.span(), - Self::Group(group) => group.span(), - Self::Block(block) => block.span(), - Self::Unary(unary) => unary.span(), - Self::Binary(binary) => binary.span(), - Self::Call(call) => call.span(), - Self::Closure(closure) => closure.span(), - Self::With(with) => with.span(), - Self::Let(let_) => let_.span(), - Self::If(if_) => if_.span(), - Self::While(while_) => while_.span(), - Self::For(for_) => for_.span(), - Self::Import(import) => import.span(), - Self::Include(include) => include.span(), +node! { + /// A literal: `1`, `true`, ... + Lit: NodeKind::None + | NodeKind::Auto + | NodeKind::Bool(_) + | NodeKind::Int(_) + | NodeKind::Float(_) + | NodeKind::Length(_, _) + | NodeKind::Angle(_, _) + | NodeKind::Percentage(_) + | NodeKind::Fraction(_) + | NodeKind::Str(_) +} + +impl Lit { + /// The kind of literal. + pub fn kind(&self) -> LitKind { + match *self.0.kind() { + NodeKind::None => LitKind::None, + NodeKind::Auto => LitKind::Auto, + NodeKind::Bool(v) => LitKind::Bool(v), + NodeKind::Int(v) => LitKind::Int(v), + NodeKind::Float(v) => LitKind::Float(v), + NodeKind::Length(v, unit) => LitKind::Length(v, unit), + NodeKind::Angle(v, unit) => LitKind::Angle(v, unit), + NodeKind::Percentage(v) => LitKind::Percent(v), + NodeKind::Fraction(v) => LitKind::Fractional(v), + NodeKind::Str(ref v) => LitKind::Str(v.clone()), + _ => panic!("literal is of wrong kind"), } } } -/// A literal: `1`, `true`, ... +/// The kind of a literal. #[derive(Debug, Clone, PartialEq)] -pub enum Lit { +pub enum LitKind { /// The none literal: `none`. - None(Span), + None, /// The auto literal: `auto`. - Auto(Span), + Auto, /// A boolean literal: `true`, `false`. - Bool(Span, bool), + Bool(bool), /// An integer literal: `120`. - Int(Span, i64), + Int(i64), /// A floating-point literal: `1.2`, `10e-4`. - Float(Span, f64), + Float(f64), /// A length literal: `12pt`, `3cm`. - Length(Span, f64, LengthUnit), + Length(f64, LengthUnit), /// An angle literal: `1.5rad`, `90deg`. - Angle(Span, f64, AngularUnit), + Angle(f64, AngularUnit), /// A percent literal: `50%`. /// /// _Note_: `50%` is stored as `50.0` here, but as `0.5` in the /// corresponding [value](crate::geom::Relative). - Percent(Span, f64), + Percent(f64), /// A fraction unit literal: `1fr`. - Fractional(Span, f64), + Fractional(f64), /// A string literal: `"hello!"`. - Str(Span, EcoString), -} - -impl TypedNode for Lit { - fn from_red(node: RedRef) -> Option { - match *node.kind() { - NodeKind::None => Some(Self::None(node.span())), - NodeKind::Auto => Some(Self::Auto(node.span())), - NodeKind::Bool(v) => Some(Self::Bool(node.span(), v)), - NodeKind::Int(v) => Some(Self::Int(node.span(), v)), - NodeKind::Float(v) => Some(Self::Float(node.span(), v)), - NodeKind::Length(v, unit) => Some(Self::Length(node.span(), v, unit)), - NodeKind::Angle(v, unit) => Some(Self::Angle(node.span(), v, unit)), - NodeKind::Percentage(v) => Some(Self::Percent(node.span(), v)), - NodeKind::Fraction(v) => Some(Self::Fractional(node.span(), v)), - NodeKind::Str(ref v) => Some(Self::Str(node.span(), v.clone())), - _ => None, - } - } -} - -impl Lit { - /// The source code location. - pub fn span(&self) -> Span { - match *self { - Self::None(span) => span, - Self::Auto(span) => span, - Self::Bool(span, _) => span, - Self::Int(span, _) => span, - Self::Float(span, _) => span, - Self::Length(span, _, _) => span, - Self::Angle(span, _, _) => span, - Self::Percent(span, _) => span, - Self::Fractional(span, _) => span, - Self::Str(span, _) => span, - } - } + Str(EcoString), } node! { /// An array expression: `(1, "hi", 12cm)`. - Array => ArrayExpr + ArrayExpr: Array } impl ArrayExpr { @@ -371,7 +357,7 @@ impl ArrayExpr { node! { /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. - Dict => DictExpr + DictExpr: Dict } impl DictExpr { @@ -400,7 +386,7 @@ impl Named { node! { /// A template expression: `[*Hi* there!]`. - Template => TemplateExpr + TemplateExpr: Template } impl TemplateExpr { @@ -412,7 +398,7 @@ impl TemplateExpr { node! { /// A grouped expression: `(1 + 2)`. - Group => GroupExpr + GroupExpr: Group } impl GroupExpr { @@ -424,7 +410,7 @@ impl GroupExpr { node! { /// A block expression: `{ let x = 1; x + 2 }`. - Block => BlockExpr + BlockExpr: Block } impl BlockExpr { @@ -436,14 +422,15 @@ impl BlockExpr { node! { /// A unary operation: `-x`. - Unary => UnaryExpr + UnaryExpr: Unary } impl UnaryExpr { /// The operator: `-`. pub fn op(&self) -> UnOp { self.0 - .cast_first_child() + .children() + .find_map(|node| UnOp::from_token(node.kind())) .expect("unary expression is missing operator") } @@ -464,12 +451,6 @@ pub enum UnOp { Not, } -impl TypedNode for UnOp { - fn from_red(node: RedRef) -> Option { - Self::from_token(node.kind()) - } -} - impl UnOp { /// Try to convert the token into a unary operation. pub fn from_token(token: &NodeKind) -> Option { @@ -501,14 +482,15 @@ impl UnOp { node! { /// A binary operation: `a + b`. - Binary => BinaryExpr + BinaryExpr: Binary } impl BinaryExpr { /// The binary operator: `+`. pub fn op(&self) -> BinOp { self.0 - .cast_first_child() + .children() + .find_map(|node| BinOp::from_token(node.kind())) .expect("binary expression is missing operator") } @@ -566,12 +548,6 @@ pub enum BinOp { DivAssign, } -impl TypedNode for BinOp { - fn from_red(node: RedRef) -> Option { - Self::from_token(node.kind()) - } -} - impl BinOp { /// Try to convert the token into a binary operation. pub fn from_token(token: &NodeKind) -> Option { @@ -671,7 +647,7 @@ pub enum Associativity { node! { /// An invocation of a function: `foo(...)`. - Call => CallExpr + CallExpr: Call } impl CallExpr { @@ -717,6 +693,14 @@ impl TypedNode for CallArg { _ => node.cast().map(CallArg::Pos), } } + + fn as_red(&self) -> RedRef<'_> { + match self { + Self::Pos(v) => v.as_red(), + Self::Named(v) => v.as_red(), + Self::Spread(v) => v.as_red(), + } + } } impl CallArg { @@ -732,7 +716,7 @@ impl CallArg { node! { /// A closure expression: `(x, y) => z`. - Closure => ClosureExpr + ClosureExpr: Closure } impl ClosureExpr { @@ -779,6 +763,14 @@ impl TypedNode for ClosureParam { _ => None, } } + + fn as_red(&self) -> RedRef<'_> { + match self { + Self::Pos(v) => v.as_red(), + Self::Named(v) => v.as_red(), + Self::Sink(v) => v.as_red(), + } + } } node! { @@ -840,7 +832,17 @@ node! { impl ImportExpr { /// The items to be imported. pub fn imports(&self) -> Imports { - self.0.cast_first_child().expect("import is missing items") + self.0 + .children() + .find_map(|node| match node.kind() { + NodeKind::Star => Some(Imports::Wildcard), + NodeKind::ImportItems => { + let items = node.children().filter_map(RedRef::cast).collect(); + Some(Imports::Items(items)) + } + _ => None, + }) + .expect("import is missing items") } /// The location of the importable file. @@ -858,19 +860,6 @@ pub enum Imports { Items(Vec), } -impl TypedNode for Imports { - fn from_red(node: RedRef) -> Option { - match node.kind() { - NodeKind::Star => Some(Imports::Wildcard), - NodeKind::ImportItems => { - let items = node.children().filter_map(RedRef::cast).collect(); - Some(Imports::Items(items)) - } - _ => None, - } - } -} - node! { /// An include expression: `include "chapter1.typ"`. IncludeExpr @@ -967,23 +956,28 @@ impl ForPattern { } } -/// An identifier. -#[derive(Debug, Clone, PartialEq)] -pub struct Ident { - /// The source code location. - pub span: Span, - /// The identifier string. - pub string: EcoString, +node! { + /// An identifier. + Ident: NodeKind::Ident(_) } -impl TypedNode for Ident { - fn from_red(node: RedRef) -> Option { - match node.kind() { - NodeKind::Ident(string) => Some(Ident { - span: node.span(), - string: string.clone(), - }), - _ => None, +impl Ident { + /// Take out the contained [`EcoString`]. + pub fn take(self) -> EcoString { + match self.0.green { + Green::Token(GreenData { kind: NodeKind::Ident(id), .. }) => id, + _ => panic!("identifier is of wrong kind"), + } + } +} + +impl Deref for Ident { + type Target = str; + + fn deref(&self) -> &Self::Target { + match &self.0.green { + Green::Token(GreenData { kind: NodeKind::Ident(id), .. }) => id, + _ => panic!("identifier is of wrong kind"), } } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index fc05ad50c..0660d57b9 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -187,11 +187,20 @@ pub struct RedNode { } impl RedNode { - /// Create a new root node from a [`GreenNode`]. - pub fn new_root(root: Rc, id: SourceId) -> Self { + /// Create a new red node from a root [`GreenNode`]. + pub fn from_root(root: Rc, id: SourceId) -> Self { Self { id, offset: 0, green: root.into() } } + /// Create a new red node from a node kind and a span. + pub fn from_data(kind: NodeKind, span: Span) -> Self { + Self { + id: span.source, + offset: span.start, + green: Green::Token(GreenData { kind, len: span.len() }), + } + } + /// Convert to a borrowed representation. pub fn as_ref(&self) -> RedRef<'_> { RedRef { @@ -540,7 +549,7 @@ pub enum NodeKind { /// A percentage: `50%`. /// /// _Note_: `50%` is stored as `50.0` here, as in the corresponding - /// [literal](ast::Lit::Percent). + /// [literal](ast::LitKind::Percent). Percentage(f64), /// A fraction unit: `3fr`. Fraction(f64), diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index fa423e94b..9e4510b62 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -198,8 +198,8 @@ impl Pretty for EnumNode { impl Pretty for Expr { fn pretty(&self, p: &mut Printer) { match self { - Self::Ident(v) => v.pretty(p), Self::Lit(v) => v.pretty(p), + Self::Ident(v) => v.pretty(p), Self::Array(v) => v.pretty(p), Self::Dict(v) => v.pretty(p), Self::Template(v) => v.pretty(p), @@ -222,17 +222,17 @@ impl Pretty for Expr { impl Pretty for Lit { fn pretty(&self, p: &mut Printer) { - match self { - Self::None(_) => p.push_str("none"), - Self::Auto(_) => p.push_str("auto"), - Self::Bool(_, v) => write!(p, "{}", v).unwrap(), - Self::Int(_, v) => write!(p, "{}", v).unwrap(), - Self::Float(_, v) => write!(p, "{}", v).unwrap(), - Self::Length(_, v, u) => write!(p, "{}{:?}", v, u).unwrap(), - Self::Angle(_, v, u) => write!(p, "{}{:?}", v, u).unwrap(), - Self::Percent(_, v) => write!(p, "{}%", v).unwrap(), - Self::Fractional(_, v) => write!(p, "{}fr", v).unwrap(), - Self::Str(_, v) => write!(p, "{:?}", v).unwrap(), + match self.kind() { + LitKind::None => p.push_str("none"), + LitKind::Auto => p.push_str("auto"), + LitKind::Bool(v) => write!(p, "{}", v).unwrap(), + LitKind::Int(v) => write!(p, "{}", v).unwrap(), + LitKind::Float(v) => write!(p, "{}", v).unwrap(), + LitKind::Length(v, u) => write!(p, "{}{:?}", v, u).unwrap(), + LitKind::Angle(v, u) => write!(p, "{}{:?}", v, u).unwrap(), + LitKind::Percent(v) => write!(p, "{}%", v).unwrap(), + LitKind::Fractional(v) => write!(p, "{}fr", v).unwrap(), + LitKind::Str(v) => write!(p, "{:?}", v).unwrap(), } } } @@ -508,7 +508,7 @@ impl Pretty for IncludeExpr { impl Pretty for Ident { fn pretty(&self, p: &mut Printer) { - p.push_str(&self.string); + p.push_str(self); } } diff --git a/src/syntax/span.rs b/src/syntax/span.rs index c26011bdb..47d965898 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -88,6 +88,11 @@ impl Span { Self { end, ..self } } + /// The byte length of the spanned region. + pub fn len(self) -> usize { + self.end - self.start + } + /// A new span at the position of this span's start. pub fn at_start(&self) -> Span { Self::at(self.source, self.start) diff --git a/tests/typ/code/closure.typ b/tests/typ/code/closure.typ index 3b8b42619..14e74e7ef 100644 --- a/tests/typ/code/closure.typ +++ b/tests/typ/code/closure.typ @@ -56,6 +56,52 @@ test(f(), 3) } +--- +// Import bindings. +{ + let b = "target.typ" + let f() = { + import b from b + b + } + test(f(), 1) +} + +--- +// For loop bindings. +{ + let v = (1, 2, 3) + let s = 0 + let f() = { + for v in v { s += v } + } + f() + test(s, 6) +} + +--- +// Let + closure bindings. +{ + let g = "hi" + let f() = { + let g() = "bye" + g() + } + test(f(), "bye") +} + +--- +// Parameter bindings. +{ + let x = 5 + let g() = { + let f(x, y: x) = x + y + f + } + + test(g()(8), 13) +} + --- // Don't leak environment. { From 38c5c362419c5eee7a4fdc0b43d3a9dfb339a6d2 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Mon, 8 Nov 2021 12:13:32 +0100 Subject: [PATCH 18/18] Final touches --- Cargo.toml | 3 +- benches/oneshot.rs | 4 +-- src/eval/capture.rs | 1 + src/eval/mod.rs | 64 +++++++++++++++++--------------------- src/eval/walk.rs | 17 ++++++++++ src/parse/mod.rs | 28 ++++++++--------- src/parse/parser.rs | 30 +++++++++--------- src/parse/resolve.rs | 12 +++---- src/parse/tokens.rs | 52 +++++++++++++++---------------- src/source.rs | 1 + src/syntax/ast.rs | 27 +++++++++------- src/syntax/mod.rs | 43 +++---------------------- src/syntax/pretty.rs | 64 +++++++++++++++++++++++--------------- tests/ref/markup/math.png | Bin 0 -> 2448 bytes tests/typ/code/array.typ | 2 +- tests/typ/code/call.typ | 2 +- tests/typ/code/dict.typ | 2 +- tests/typ/markup/math.typ | 12 +++++++ 18 files changed, 183 insertions(+), 181 deletions(-) create mode 100644 tests/ref/markup/math.png create mode 100644 tests/typ/markup/math.typ diff --git a/Cargo.toml b/Cargo.toml index 6a5b72b99..c7fa703c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,11 +5,10 @@ authors = ["The Typst Project Developers"] edition = "2018" [features] -default = ["cli", "fs", "layout-cache", "parse-cache"] +default = ["cli", "fs", "layout-cache"] cli = ["anyhow", "codespan-reporting", "fs", "pico-args", "same-file"] fs = ["dirs", "memmap2", "same-file", "walkdir"] layout-cache = ["rand"] -parse-cache = [] [profile.dev] # Faster compilation diff --git a/benches/oneshot.rs b/benches/oneshot.rs index 63f201ac5..bb385688f 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -44,11 +44,11 @@ fn bench_scan(iai: &mut Iai) { } fn bench_tokenize(iai: &mut Iai) { - iai.run(|| Tokens::new(black_box(&SRC), black_box(TokenMode::Markup)).count()); + iai.run(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup)).count()); } fn bench_parse(iai: &mut Iai) { - iai.run(|| parse(&SRC)); + iai.run(|| parse(SRC)); } fn bench_eval(iai: &mut Iai) { diff --git a/src/eval/capture.rs b/src/eval/capture.rs index 4e24bc908..e47831dfd 100644 --- a/src/eval/capture.rs +++ b/src/eval/capture.rs @@ -172,5 +172,6 @@ mod tests { // Scoping. test("{ let x = 1; { let y = 2; y }; x + y }", &["y"]); + test("[#let x = 1]#x", &["x"]); } } diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 540b58b98..fda2184e6 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -398,11 +398,11 @@ impl Eval for CallArgs { value: Spanned::new(expr.eval(ctx)?, expr.span()), }); } - CallArg::Named(x) => { + CallArg::Named(named) => { items.push(Arg { span, - name: Some(x.name().take().into()), - value: Spanned::new(x.expr().eval(ctx)?, x.expr().span()), + name: Some(named.name().take().into()), + value: Spanned::new(named.expr().eval(ctx)?, named.expr().span()), }); } CallArg::Spread(expr) => match expr.eval(ctx)? { @@ -511,8 +511,8 @@ impl Eval for WithExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let wrapped = - self.callee().eval(ctx)?.cast::().at(self.callee().span())?; + let callee = self.callee(); + let wrapped = callee.eval(ctx)?.cast::().at(callee.span())?; let applied = self.args().eval(ctx)?; let name = wrapped.name().cloned(); @@ -529,7 +529,7 @@ impl Eval for LetExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let value = match &self.init() { + let value = match self.init() { Some(expr) => expr.eval(ctx)?, None => Value::None, }; @@ -542,15 +542,10 @@ impl Eval for IfExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let condition = self - .condition() - .eval(ctx)? - .cast::() - .at(self.condition().span())?; - - if condition { + let condition = self.condition(); + if condition.eval(ctx)?.cast::().at(condition.span())? { self.if_body().eval(ctx) - } else if let Some(else_body) = &self.else_body() { + } else if let Some(else_body) = self.else_body() { else_body.eval(ctx) } else { Ok(Value::None) @@ -564,14 +559,11 @@ impl Eval for WhileExpr { fn eval(&self, ctx: &mut EvalContext) -> TypResult { let mut output = Value::None; - while self - .condition() - .eval(ctx)? - .cast::() - .at(self.condition().span())? - { - let value = self.body().eval(ctx)?; - output = ops::join(output, value).at(self.body().span())?; + let condition = self.condition(); + while condition.eval(ctx)?.cast::().at(condition.span())? { + let body = self.body(); + let value = body.eval(ctx)?; + output = ops::join(output, value).at(body.span())?; } Ok(output) @@ -597,7 +589,7 @@ impl Eval for ForExpr { } ctx.scopes.exit(); - Ok(output) + return Ok(output); }}; } @@ -607,18 +599,20 @@ impl Eval for ForExpr { let value = pattern.value().take(); match (key, value, iter) { - (None, v, Value::Str(string)) => iter!(for (v => value) in string.iter()), + (None, v, Value::Str(string)) => { + iter!(for (v => value) in string.iter()); + } (None, v, Value::Array(array)) => { - iter!(for (v => value) in array.into_iter()) + iter!(for (v => value) in array.into_iter()); } (Some(i), v, Value::Array(array)) => { - iter!(for (i => idx, v => value) in array.into_iter().enumerate()) + iter!(for (i => idx, v => value) in array.into_iter().enumerate()); } (None, v, Value::Dict(dict)) => { - iter!(for (v => value) in dict.into_iter().map(|p| p.1)) + iter!(for (v => value) in dict.into_iter().map(|p| p.1)); } (Some(k), v, Value::Dict(dict)) => { - iter!(for (k => key, v => value) in dict.into_iter()) + iter!(for (k => key, v => value) in dict.into_iter()); } (_, _, Value::Str(_)) => { bail!(pattern.span(), "mismatched pattern"); @@ -634,9 +628,9 @@ impl Eval for ImportExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let path = self.path().eval(ctx)?.cast::().at(self.path().span())?; - - let file = ctx.import(&path, self.path().span())?; + let path = self.path(); + let resolved = path.eval(ctx)?.cast::().at(path.span())?; + let file = ctx.import(&resolved, path.span())?; let module = &ctx.modules[&file]; match self.imports() { @@ -664,12 +658,10 @@ impl Eval for IncludeExpr { type Output = Value; fn eval(&self, ctx: &mut EvalContext) -> TypResult { - let path_node = self.path(); - let path = path_node.eval(ctx)?.cast::().at(path_node.span())?; - - let file = ctx.import(&path, path_node.span())?; + let path = self.path(); + let resolved = path.eval(ctx)?.cast::().at(path.span())?; + let file = ctx.import(&resolved, path.span())?; let module = &ctx.modules[&file]; - Ok(Value::Template(module.template.clone())) } } diff --git a/src/eval/walk.rs b/src/eval/walk.rs index 1656929b8..aab32f406 100644 --- a/src/eval/walk.rs +++ b/src/eval/walk.rs @@ -33,6 +33,7 @@ impl Walk for MarkupNode { Self::Emph => ctx.template.modify(|s| s.text_mut().emph.flip()), Self::Text(text) => ctx.template.text(text), Self::Raw(raw) => raw.walk(ctx)?, + Self::Math(math) => math.walk(ctx)?, Self::Heading(heading) => heading.walk(ctx)?, Self::List(list) => list.walk(ctx)?, Self::Enum(enum_) => enum_.walk(ctx)?, @@ -67,6 +68,22 @@ impl Walk for RawNode { } } +impl Walk for MathNode { + fn walk(&self, ctx: &mut EvalContext) -> TypResult<()> { + if self.display { + ctx.template.parbreak(); + } + + ctx.template.monospace(self.formula.trim()); + + if self.display { + ctx.template.parbreak(); + } + + Ok(()) + } +} + impl Walk for HeadingNode { fn walk(&self, ctx: &mut EvalContext) -> TypResult<()> { let level = self.level(); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 78e4f896d..f9c0049f0 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -16,8 +16,8 @@ use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; /// Parse a source file. -pub fn parse(source: &str) -> Rc { - let mut p = Parser::new(source); +pub fn parse(src: &str) -> Rc { + let mut p = Parser::new(src); markup(&mut p); match p.finish().into_iter().next() { Some(Green::Node(node)) => node, @@ -93,16 +93,17 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::Strong | NodeKind::Linebreak | NodeKind::Raw(_) + | NodeKind::Math(_) | NodeKind::UnicodeEscape(_) => { p.eat(); } NodeKind::Eq if *at_start => heading(p), - NodeKind::ListBullet if *at_start => list_node(p), + NodeKind::Minus if *at_start => list_node(p), NodeKind::EnumNumbering(_) if *at_start => enum_node(p), // Line-based markup that is not currently at the start of the line. - NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { + NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => { p.convert(NodeKind::Text(p.peek_src().into())); } @@ -149,7 +150,7 @@ fn heading(p: &mut Parser) { /// Parse a single list item. fn list_node(p: &mut Parser) { p.perform(NodeKind::List, |p| { - p.eat_assert(&NodeKind::ListBullet); + p.eat_assert(&NodeKind::Minus); let column = p.column(p.prev_end()); markup_indented(p, column); }); @@ -193,10 +194,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { loop { // Exclamation mark, parenthesis or bracket means this is a function // call. - if matches!( - p.peek_direct(), - Some(NodeKind::LeftParen | NodeKind::LeftBracket) - ) { + if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() { call(p, marker)?; continue; } @@ -241,7 +239,6 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { match p.peek() { // Things that start with an identifier. Some(NodeKind::Ident(_)) => { - // Start closure params. let marker = p.marker(); p.eat(); @@ -364,9 +361,10 @@ enum CollectionKind { /// Returns the length of the collection and whether the literal contained any /// commas. fn collection(p: &mut Parser) -> (CollectionKind, usize) { - let mut items = 0; let mut kind = CollectionKind::Positional; + let mut items = 0; let mut can_group = true; + let mut error = false; let mut missing_coma: Option = None; while !p.eof() { @@ -393,12 +391,14 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { if p.eat_if(&NodeKind::Comma) { can_group = false; } else { - missing_coma = Some(p.marker()); + missing_coma = Some(p.trivia_start()); } + } else { + error = true; } } - if can_group && items == 1 { + if error || (can_group && items == 1) { kind = CollectionKind::Group; } @@ -467,7 +467,7 @@ fn params(p: &mut Parser, marker: Marker) { NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()), NodeKind::Spread if matches!( - x.children().last().map(|x| x.kind()), + x.children().last().map(|child| child.kind()), Some(&NodeKind::Ident(_)) ) => { diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5ebc2c17e..1c4c2a5c1 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -52,6 +52,17 @@ impl<'s> Parser<'s> { Marker(self.children.len()) } + /// Create a markup right before the trailing trivia. + pub fn trivia_start(&self) -> Marker { + let count = self + .children + .iter() + .rev() + .take_while(|node| self.is_trivia(node.kind())) + .count(); + Marker(self.children.len() - count) + } + /// Perform a subparse that wraps its result in a node with the given kind. pub fn perform(&mut self, kind: NodeKind, f: F) -> T where @@ -66,7 +77,7 @@ impl<'s> Parser<'s> { // Trailing trivia should not be wrapped into the new node. let idx = self.children.len(); self.children.push(Green::default()); - self.children.extend(children.drain(until ..)); + self.children.extend(children.drain(until.0 ..)); self.children[idx] = GreenNode::with_children(kind, children).into(); } else { self.children.push(GreenNode::with_children(kind, children).into()); @@ -238,7 +249,7 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { if group_mode == TokenMode::Code { - self.children.truncate(self.trivia_start()); + self.children.truncate(self.trivia_start().0); } self.tokens.jump(self.prev_end()); @@ -290,17 +301,6 @@ impl<'s> Parser<'s> { } } - /// Find the index in the children list where trailing trivia starts. - fn trivia_start(&self) -> usize { - self.children.len() - - self - .children - .iter() - .rev() - .take_while(|node| self.is_trivia(node.kind())) - .count() - } - /// Whether the active group must end at a newline. fn stop_at_newline(&self) -> bool { matches!( @@ -350,7 +350,7 @@ impl Parser<'_> { /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. pub fn expected_at(&mut self, thing: &str) { - Marker(self.trivia_start()).expected(self, thing); + self.trivia_start().expected(self, thing); } } @@ -374,7 +374,7 @@ impl Marker { /// with the given `kind`. pub fn end(self, p: &mut Parser, kind: NodeKind) { let until = p.trivia_start(); - let children = p.children.drain(self.0 .. until).collect(); + let children = p.children.drain(self.0 .. until.0).collect(); p.children .insert(self.0, GreenNode::with_children(kind, children).into()); } diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 6719f41df..e15ae339d 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,5 +1,5 @@ use super::{is_ident, is_newline, Scanner}; -use crate::syntax::RawData; +use crate::syntax::ast::RawNode; use crate::util::EcoString; /// Resolve all escape sequences in a string. @@ -46,21 +46,19 @@ pub fn resolve_hex(sequence: &str) -> Option { } /// Resolve the language tag and trims the raw text. -pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData { +pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawNode { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); - RawData { + RawNode { lang: is_ident(tag).then(|| tag.into()), text: text.into(), - backticks, block, } } else { - RawData { + RawNode { lang: None, text: split_lines(text).join("\n").into(), - backticks, block: false, } } @@ -181,7 +179,7 @@ mod tests { #[track_caller] fn test( column: usize, - backticks: u8, + backticks: usize, raw: &str, lang: Option<&str>, text: &str, diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 1523cd643..96dfd9d15 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -5,7 +5,8 @@ use super::{ Scanner, }; use crate::geom::{AngularUnit, LengthUnit}; -use crate::syntax::*; +use crate::syntax::ast::{MathNode, RawNode}; +use crate::syntax::{ErrorPos, NodeKind}; use crate::util::EcoString; /// An iterator over the tokens of a string of source code. @@ -26,8 +27,8 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] - pub fn new(source: &'s str, mode: TokenMode) -> Self { - Self { s: Scanner::new(source), mode } + pub fn new(src: &'s str, mode: TokenMode) -> Self { + Self { s: Scanner::new(src), mode } } /// Get the current token mode. @@ -254,7 +255,7 @@ impl<'s> Tokens<'s> { } } c if c.is_whitespace() => NodeKind::Linebreak, - _ => NodeKind::Text("\\".into()), + _ => NodeKind::Text('\\'.into()), }, None => NodeKind::Linebreak, } @@ -281,7 +282,7 @@ impl<'s> Tokens<'s> { NodeKind::EnDash } } else if self.s.check_or(true, char::is_whitespace) { - NodeKind::ListBullet + NodeKind::Minus } else { NodeKind::Text("-".into()) } @@ -310,16 +311,15 @@ impl<'s> Tokens<'s> { let column = self.s.column(self.s.index() - 1); let mut backticks = 1; - while self.s.eat_if('`') && backticks < u8::MAX { + while self.s.eat_if('`') { backticks += 1; } // Special case for empty inline block. if backticks == 2 { - return NodeKind::Raw(Rc::new(RawData { + return NodeKind::Raw(Rc::new(RawNode { text: EcoString::new(), lang: None, - backticks: 1, block: false, })); } @@ -389,7 +389,7 @@ impl<'s> Tokens<'s> { }; if terminated { - NodeKind::Math(Rc::new(MathData { + NodeKind::Math(Rc::new(MathNode { formula: self.s.get(start .. end).into(), display, })) @@ -429,9 +429,7 @@ impl<'s> Tokens<'s> { // Read the exponent. if self.s.eat_if('e') || self.s.eat_if('E') { - if !self.s.eat_if('+') { - self.s.eat_if('-'); - } + let _ = self.s.eat_if('+') || self.s.eat_if('-'); self.s.eat_while(|c| c.is_ascii_digit()); } @@ -483,6 +481,7 @@ impl<'s> Tokens<'s> { false } })); + if self.s.eat_if('"') { NodeKind::Str(string) } else { @@ -567,17 +566,16 @@ mod tests { NodeKind::Error(pos, message.into()) } - fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind { - NodeKind::Raw(Rc::new(RawData { + fn Raw(text: &str, lang: Option<&str>, block: bool) -> NodeKind { + NodeKind::Raw(Rc::new(RawNode { text: text.into(), lang: lang.map(Into::into), - backticks: backticks_left, block, })) } fn Math(formula: &str, display: bool) -> NodeKind { - NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) + NodeKind::Math(Rc::new(MathNode { formula: formula.into(), display })) } fn Str(string: &str) -> NodeKind { @@ -655,13 +653,13 @@ mod tests { ]; // Test with each applicable suffix. - for (block, mode, suffix, token) in suffixes { + for &(block, mode, suffix, ref token) in suffixes { let src = $src; #[allow(unused_variables)] let blocks = BLOCKS; $(let blocks = $blocks;)? assert!(!blocks.contains(|c| !BLOCKS.contains(c))); - if (mode.is_none() || mode == &Some($mode)) && blocks.contains(*block) { + if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) { t!(@$mode: format!("{}{}", src, suffix) => $($token,)* token); } } @@ -790,7 +788,7 @@ mod tests { t!(Markup: "~" => NonBreakingSpace); t!(Markup[" "]: r"\" => Linebreak); t!(Markup["a "]: r"a--" => Text("a"), EnDash); - t!(Markup["a1/"]: "- " => ListBullet, Space(0)); + t!(Markup["a1/"]: "- " => Minus, Space(0)); t!(Markup[" "]: "." => EnumNumbering(None)); t!(Markup[" "]: "1." => EnumNumbering(Some(1))); t!(Markup[" "]: "1.a" => Text("1."), Text("a")); @@ -867,22 +865,22 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { // Test basic raw block. - t!(Markup: "``" => Raw("", None, 1, false)); - t!(Markup: "`raw`" => Raw("raw", None, 1, false)); + t!(Markup: "``" => Raw("", None, false)); + t!(Markup: "`raw`" => Raw("raw", None, false)); t!(Markup[""]: "`]" => Error(End, "expected 1 backtick")); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(End, "expected 1 backtick")); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick")); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false)); // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); - t!(Markup: "````🚀````" => Raw("", None, 4, false)); + t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false)); + t!(Markup: "````🚀````" => Raw("", None, false)); t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks")); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false)); } #[test] diff --git a/src/source.rs b/src/source.rs index 713380c58..85f48491a 100644 --- a/src/source.rs +++ b/src/source.rs @@ -144,6 +144,7 @@ impl SourceFile { } } + /// The file's abstract syntax tree. pub fn ast(&self) -> TypResult { let red = RedNode::from_root(self.root.clone(), self.id); let errors = red.errors(); diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 067bd6daf..288c749a0 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -68,11 +68,8 @@ impl Markup { NodeKind::EnDash => Some(MarkupNode::Text("\u{2013}".into())), NodeKind::EmDash => Some(MarkupNode::Text("\u{2014}".into())), NodeKind::NonBreakingSpace => Some(MarkupNode::Text("\u{00A0}".into())), - NodeKind::Raw(raw) => Some(MarkupNode::Raw(RawNode { - block: raw.block, - lang: raw.lang.clone(), - text: raw.text.clone(), - })), + NodeKind::Math(math) => Some(MarkupNode::Math(math.as_ref().clone())), + NodeKind::Raw(raw) => Some(MarkupNode::Raw(raw.as_ref().clone())), NodeKind::Heading => node.cast().map(MarkupNode::Heading), NodeKind::List => node.cast().map(MarkupNode::List), NodeKind::Enum => node.cast().map(MarkupNode::Enum), @@ -98,6 +95,8 @@ pub enum MarkupNode { Text(EcoString), /// A raw block with optional syntax highlighting: `` `...` ``. Raw(RawNode), + /// A math formula: `$a^2 = b^2 + c^2$`. + Math(MathNode), /// A section heading: `= Introduction`. Heading(HeadingNode), /// An item in an unordered list: `- ...`. @@ -121,6 +120,16 @@ pub struct RawNode { pub block: bool, } +/// A math formula: `$a^2 + b^2 = c^2$`. +#[derive(Debug, Clone, PartialEq)] +pub struct MathNode { + /// The formula between the dollars / brackets. + pub formula: EcoString, + /// Whether the formula is display-level, that is, it is surrounded by + /// `$[..]$`. + pub display: bool, +} + node! { /// A section heading: `= Introduction`. HeadingNode: Heading @@ -133,12 +142,8 @@ impl HeadingNode { } /// The section depth (numer of equals signs). - pub fn level(&self) -> u8 { - self.0 - .children() - .filter(|n| n.kind() == &NodeKind::Eq) - .count() - .min(u8::MAX.into()) as u8 + pub fn level(&self) -> usize { + self.0.children().filter(|n| n.kind() == &NodeKind::Eq).count() } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 0660d57b9..ca6ed2430 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -10,7 +10,7 @@ use std::rc::Rc; pub use pretty::*; pub use span::*; -use self::ast::TypedNode; +use self::ast::{MathNode, RawNode, TypedNode}; use crate::diag::Error; use crate::geom::{AngularUnit, LengthUnit}; use crate::source::SourceId; @@ -178,7 +178,7 @@ impl From for Green { /// A owned wrapper for a green node with span information. /// -/// Owned variant of [`RedRef`]. Can be [cast](Self::cast) to an AST nodes. +/// Owned variant of [`RedRef`]. Can be [cast](Self::cast) to an AST node. #[derive(Clone, PartialEq)] pub struct RedNode { id: SourceId, @@ -192,15 +192,6 @@ impl RedNode { Self { id, offset: 0, green: root.into() } } - /// Create a new red node from a node kind and a span. - pub fn from_data(kind: NodeKind, span: Span) -> Self { - Self { - id: span.source, - offset: span.start, - green: Green::Token(GreenData { kind, len: span.len() }), - } - } - /// Convert to a borrowed representation. pub fn as_ref(&self) -> RedRef<'_> { RedRef { @@ -527,13 +518,11 @@ pub enum NodeKind { EnumNumbering(Option), /// An item in an unordered list: `- ...`. List, - /// The bullet character of an item in an unordered list: `-`. - ListBullet, /// An arbitrary number of backticks followed by inner contents, terminated /// with the same number of backticks: `` `...` ``. - Raw(Rc), + Raw(Rc), /// Dollar signs surrounding inner contents. - Math(Rc), + Math(Rc), /// An identifier: `center`. Ident(EcoString), /// A boolean: `true`, `false`. @@ -613,29 +602,6 @@ pub enum NodeKind { Unknown(EcoString), } -/// Payload of a raw block node. -#[derive(Debug, Clone, PartialEq)] -pub struct RawData { - /// The raw text in the block. - pub text: EcoString, - /// The programming language of the raw text. - pub lang: Option, - /// The number of opening backticks. - pub backticks: u8, - /// Whether to display this as a block. - pub block: bool, -} - -/// Payload of a math formula node. -#[derive(Debug, Clone, PartialEq)] -pub struct MathData { - /// The formula between the dollars / brackets. - pub formula: EcoString, - /// Whether the formula is display-level, that is, it is surrounded by - /// `$[..]$`. - pub display: bool, -} - /// Where in a node an error should be annotated. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum ErrorPos { @@ -730,7 +696,6 @@ impl NodeKind { Self::Enum => "enumeration item", Self::EnumNumbering(_) => "enumeration item numbering", Self::List => "list item", - Self::ListBullet => "list bullet", Self::Raw(_) => "raw block", Self::Math(_) => "math formula", Self::Ident(_) => "identifier", diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index 9e4510b62..c453fb563 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -63,7 +63,6 @@ impl Printer { write_item(item, self); count += 1; } - count } @@ -99,6 +98,7 @@ impl Pretty for MarkupNode { Self::Emph => p.push('_'), Self::Text(text) => p.push_str(text), Self::Raw(raw) => raw.pretty(p), + Self::Math(math) => math.pretty(p), Self::Heading(heading) => heading.pretty(p), Self::List(list) => list.pretty(p), Self::Enum(enum_) => enum_.pretty(p), @@ -168,6 +168,20 @@ impl Pretty for RawNode { } } +impl Pretty for MathNode { + fn pretty(&self, p: &mut Printer) { + p.push('$'); + if self.display { + p.push('['); + } + p.push_str(&self.formula); + if self.display { + p.push(']'); + } + p.push('$'); + } +} + impl Pretty for HeadingNode { fn pretty(&self, p: &mut Printer) { for _ in 0 .. self.level() { @@ -253,12 +267,9 @@ impl Pretty for ArrayExpr { impl Pretty for DictExpr { fn pretty(&self, p: &mut Printer) { p.push('('); - - let mut items = self.items().peekable(); - if items.peek().is_none() { + let len = p.join(self.items(), ", ", |named, p| named.pretty(p)); + if len == 0 { p.push(':'); - } else { - p.join(items, ", ", |named, p| named.pretty(p)); } p.push(')'); } @@ -291,13 +302,11 @@ impl Pretty for GroupExpr { impl Pretty for BlockExpr { fn pretty(&self, p: &mut Printer) { p.push('{'); - - let exprs: Vec<_> = self.exprs().collect(); - if exprs.len() > 1 { + if self.exprs().count() > 1 { p.push(' '); } - p.join(&exprs, "; ", |expr, p| expr.pretty(p)); - if exprs.len() > 1 { + let len = p.join(self.exprs(), "; ", |expr, p| expr.pretty(p)); + if len > 1 { p.push(' '); } p.push('}'); @@ -348,17 +357,17 @@ impl Pretty for CallExpr { }; let args: Vec<_> = self.args().items().collect(); - - if let Some(Expr::Template(template)) = args - .last() - .and_then(|x| if let CallArg::Pos(arg) = x { Some(arg) } else { None }) - { - if args.len() > 1 { - write_args(&args[0 .. args.len() - 1]); + match args.as_slice() { + // This can be moved behind the arguments. + // + // Example: Transforms "#v(a, [b])" => "#v(a)[b]". + [head @ .., CallArg::Pos(Expr::Template(template))] => { + if !head.is_empty() { + write_args(head); + } + template.pretty(p); } - template.pretty(p); - } else { - write_args(&args); + items => write_args(items), } } } @@ -423,12 +432,12 @@ impl Pretty for LetExpr { fn pretty(&self, p: &mut Printer) { p.push_str("let "); self.binding().pretty(p); - if let Some(Expr::Closure(closure)) = &self.init() { + if let Some(Expr::Closure(closure)) = self.init() { p.push('('); p.join(closure.params(), ", ", |item, p| item.pretty(p)); p.push_str(") = "); closure.body().pretty(p); - } else if let Some(init) = &self.init() { + } else if let Some(init) = self.init() { p.push_str(" = "); init.pretty(p); } @@ -441,7 +450,7 @@ impl Pretty for IfExpr { self.condition().pretty(p); p.push(' '); self.if_body().pretty(p); - if let Some(expr) = &self.else_body() { + if let Some(expr) = self.else_body() { p.push_str(" else "); expr.pretty(p); } @@ -525,7 +534,7 @@ mod tests { #[track_caller] fn test_parse(src: &str, expected: &str) { let source = SourceFile::detached(src); - let ast: Markup = source.ast().unwrap(); + let ast = source.ast().unwrap(); let found = pretty(&ast); if found != expected { println!("tree: {:#?}", ast); @@ -563,6 +572,11 @@ mod tests { test_parse("``` 1```", "`1`"); test_parse("``` 1 ```", "`1 `"); test_parse("```` ` ````", "``` ` ```"); + + // Math node. + roundtrip("$$"); + roundtrip("$a+b$"); + roundtrip("$[ a^2 + b^2 = c^2 ]$"); } #[test] diff --git a/tests/ref/markup/math.png b/tests/ref/markup/math.png new file mode 100644 index 0000000000000000000000000000000000000000..426f3dbfb88fc52c5f4a2cb688131b56055371a5 GIT binary patch literal 2448 zcmaJ@c{CIX7avSxh~7+6NDPBY=A~((u`kao2`@{sWS1?&H(4H8!lWl+j5TReF+@a~ ztVN1vd5vX8S)MUdk-;EhXpH&#&UfBB-}~cz_uO;OJ@=e@?zzkF++W7e_E>pY6sC`rd~)>islgn5O2{S!>tsUD?Z|ExG^ zt1sPqb0EDOc9fA1+WiS;Y^m!QggvVB2i#Mbg#PR~?LET7j{KOMKAu;8=cD<*k|qU$ z0u*9C$a$^w<+>=oE|Moa%2*zW$FslfJjWCZ%?>4v4ElP!tAR3fu?D%Hlv^+cG&jZ( zdOv2Q=k_UFXq2ur4XRC6(O;Fz@9kl6^rn+w@6n79oz}BF)k=Q+phd=#8uu}=%$qKw zz^Z;+q!|F`Ia?AtEkJiGLQ_)Y`1>W&qpE|YtTUX$K}tweWKF*+%i^iEUj;!jM4!wW zC%+3D0;L-uu$l+^%D79=nBht%jW?NvcGsI{(ij^hr;5XO zH@in%h>6JTbex9gkDqEk_-cd}IMRv*jLtwwUoHKe6d2K<@|NDA_uWrZh3h4Wz`IlM zX4NB%ohfi|E&bg@lrHit?G3L6Tt3i0Jb}s$r~rlWL9f2oIjK{7>lw|(?U&!=hGEayF;i?S|1Ce`t%5Ec11QFUdNvfB!rI;wN_^h1a1@LvCB1!MuYU# z2uv9hQ1VPA8-2}yEGJ*q^N^VmGHh^}5HG#!6QQ4ZX-u`ZcA&v)IbSNU+5C{FOgo%41{@9lg!y$&=nNi4P^wCRa94pKH6!^*d-H2;3>E3r@? zpd|#zb%i(YBn(y1Ckm~}4PTok+z?@x7w_r)~5Pgd86Ek6!T z+AWC()g|rPk@MwAUiorhZ90{#yEU6xSGS;7v%vp|3}IBHaf)DLPS=G?4cZw0v($tB zJyJlC2Lxx7&#b<>*vRA>YDP6&zq+6Miv2~ruj}Ng`#Siv3^`Y^F@!c|T3ET%pg6=H z#xU80pnNUqfDWmYhmvOuXLHytud}p=fRv9NQ~D;+8FGVcfj(lz;hy~J!<+wvW*E3D z6RP!>?5Gka_5MDJ|IJ(}g4(;BF>NY3J89BZ82jS9K&}h2HqaV4zv4!TM7kcg0$(|8 zg;Pid`^L#Ls+z6e1_|rig=?}TZK)Nr)I?OE(ANG&vwW*dy1>oCJi|L3qG0QHIVwJ) zw4Lfu?wdh-d4D?k}>63vb(b1%=z$`d^VrJ^~4(Un+el8M? z*iuSJo_``xsLT_}0Hv_*n-W^s3rOc7_rMT-wYIWfHLJSC;oKdF-x=bqu3S*wi0-As z>L=$^U0`eJ0l!9^qZ!(#Y^EDTxrBC7RC5>hx|9SLs&bbx7oN_Rlcy^?;!a0KE0pTw8DBTk*Y0f z>7a=)1#`IY3f@}#z0)9dbMovPaY0-97g{#<+09B%+{O{>-V7k?HtF{4C~ZvUz-+OLkjGIY$t9cpIx#Fg9oBCM3FyJ>9jAGsvgo(Y zh;(!?N8M&lx7KCRBC)=73aohMZslw7C~k_rsXd7kqZ*`X~YH`;i@0Q zR@P1wj=rK?UR#;lXs@=rK>e`DiaAntl7@x)<<0o29ScI)Uj8k=&z+VFQk$Z>p32hK zT7Qbni&#5Scj`Kqo``t9YiFlpc(b$l9BGD}2ZL&g8W*LiHri%-|NmZB_5lAP cLE+m!&^W9H?V%2-|4=Qwjr~t`Rz9i!26O;~?f?J) literal 0 HcmV?d00001 diff --git a/tests/typ/code/array.typ b/tests/typ/code/array.typ index 44b8b5979..df37dd454 100644 --- a/tests/typ/code/array.typ +++ b/tests/typ/code/array.typ @@ -72,7 +72,7 @@ {(,1)} // Missing expression makes named pair incomplete, making this an empty array. -// Error: 3-5 expected expression, found named pair +// Error: 5 expected expression {(a:)} // Named pair after this is already identified as an array. diff --git a/tests/typ/code/call.typ b/tests/typ/code/call.typ index 95d75595d..2c16af1cf 100644 --- a/tests/typ/code/call.typ +++ b/tests/typ/code/call.typ @@ -72,7 +72,7 @@ // Error: 10-12 expected expression, found end of block comment #func(a:1*/) -// Error: 9 expected comma +// Error: 8 expected comma #func(1 2) // Error: 7-8 expected identifier diff --git a/tests/typ/code/dict.typ b/tests/typ/code/dict.typ index 757759aca..b369b8b65 100644 --- a/tests/typ/code/dict.typ +++ b/tests/typ/code/dict.typ @@ -42,7 +42,7 @@ // Identified as dictionary due to initial colon. // Error: 4-5 expected named pair, found expression -// Error: 6 expected comma +// Error: 5 expected comma // Error: 12-16 expected identifier // Error: 17-18 expected expression, found colon {(:1 b:"", true::)} diff --git a/tests/typ/markup/math.typ b/tests/typ/markup/math.typ new file mode 100644 index 000000000..cad01d107 --- /dev/null +++ b/tests/typ/markup/math.typ @@ -0,0 +1,12 @@ +// Test math formulas. + +--- +The sum of $a$ and $b$ is $a + b$. + +--- +We will show that: +$[ a^2 + b^2 = c^2 ]$ + +--- +// Error: 2:1 expected closing bracket and dollar sign +$[a